Permalink
Cannot retrieve contributors at this time
| /*! @file | |
| * @brief | |
| * | |
| * @version 1.0.0 | |
| * | |
| * (C) Copyright 2017 GoPro Inc (http://gopro.com/). | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| * | |
| */ | |
| #include "config.h" | |
| #include "timing.h" | |
| #if WARPSTUFF | |
| #include "WarpLib.h" | |
| #endif | |
| //#include <stdlib.h> | |
| #include <stddef.h> | |
| #include <math.h> | |
| #include <memory.h> | |
| #include <time.h> | |
| //#include <stdint.h> | |
| #ifndef DEBUG | |
| #define DEBUG (1 && _DEBUG) | |
| #endif | |
| #ifndef TIMING | |
| #define TIMING (1 && _TIMING) | |
| #endif | |
| #ifndef XMMOPT | |
| #define XMMOPT (1 && _XMMOPT) | |
| #endif | |
| #define GEN_LICENSE 0 | |
| #ifndef PI | |
| #define PI 3.14159265359f | |
| #endif | |
| #ifdef _WIN32 | |
| #include <windows.h> | |
| #elif __APPLE__ | |
| #include "macdefs.h" | |
| #else | |
| #ifndef ZeroMemory | |
| #define ZeroMemory(p,s) memset(p,0,s) | |
| #endif | |
| #endif | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <emmintrin.h> // Intel aligned alloc and free | |
| #include "dump.h" | |
| #include "decoder.h" | |
| #include "codec.h" | |
| #include "vlc.h" | |
| #include "codebooks.h" // References to the codebooks | |
| #include "debug.h" | |
| #include "color.h" // Color formats supported by image processing routines | |
| #include "image.h" | |
| #include "filter.h" | |
| #include "spatial.h" | |
| #include "temporal.h" | |
| //#include "logo40x5.h" | |
| #include "convert.h" | |
| #include "wavelet.h" | |
| #include "bitstream.h" | |
| #include "frame.h" | |
| #include "cpuid.h" | |
| #include "bayer.h" | |
| #include "metadata.h" | |
| #include "DemoasicFrames.h" //TODO: Change filename to lower case | |
| #include "swap.h" | |
| #include "draw.h" | |
| #include "RGB2YUV.h" | |
| #include "lutpath.h" | |
| #include "exception.h" | |
| extern void FastVignetteInplaceWP13(DECODER *decoder, int displayWidth, int width, int height, int y, float r1, float r2, float gain, | |
| int16_t *sptr, int resolution, int pixelsize); | |
| extern void FastSharpeningBlurHinplaceWP13(int width, int16_t *sptr, float sharpness, int resolution, int pixelsize); | |
| extern void FastSharpeningBlurVWP13(short *Aptr, | |
| short *Bptr, | |
| short *Cptr, | |
| short *Dptr, | |
| short *Eptr, | |
| int pitch, | |
| int edgenear, | |
| short *output, | |
| int pixels, | |
| float sharpness, | |
| int resolution, | |
| int channel_blend_type); | |
| extern void FastSharpeningBlurVW13A(short *Aptr, | |
| short *Bptr, | |
| short *Cptr, | |
| short *Dptr, | |
| short *Eptr, | |
| int pitch, | |
| int edgenear, | |
| short *output, | |
| int pixels, | |
| float sharpness, | |
| int resolution, | |
| int channel_blend_type); | |
| #ifdef SPI_LOADER | |
| #include "spi.h" | |
| #include "keyframes.h" | |
| #endif | |
| #ifndef DUMP | |
| #define DUMP (0 && _DUMP) | |
| #endif | |
| #define ERROR_TOLERANT 1 | |
| #if defined(_WIN32) && DEBUG | |
| #include <tchar.h> // For printing debug string in the console window | |
| #endif | |
| #define _DECODE_TRANSFORM 1 // Enable concurrent decoding and inverse transform | |
| #define _TRANSFORM_FIELDPLUS 1 // Use the field plus transform | |
| #if _SIF // In SIF resolution, enable the _DECODE_TRANSFORM switch | |
| #if _DECODE_TRANSFORM == 0 | |
| #define _DECODE_TRANSFORM 1 | |
| #endif | |
| #endif | |
| #ifndef _FSMBUFFER | |
| #define _FSMBUFFER 0 | |
| #endif | |
| // Turn off saturation in this file | |
| #ifdef SATURATE | |
| #undef SATURATE | |
| #endif | |
| #define SATURATE(x) (assert(PIXEL_MIN <= (x) && (x) <= PIXEL_MAX), (x)) | |
| #define SATURATE8S(x) (assert(PIXEL8S_MIN <= (x) && (x) <= PIXEL8S_MAX), (x)) | |
| //#define SATURATE8S(x) SATURATE_8S(x) | |
| //#define SATURATE(x) (x) | |
| // Enable or disable function inlining | |
| #if 1 //DEBUG | |
| #define inline | |
| #else | |
| #define inline __forceinline | |
| #endif | |
| // Pixel size used for computing the compression ratio | |
| #define BITS_PER_PIXEL 8 | |
| // Default processor capabilities | |
| #define DEFAULT_FEATURES (_CPU_FEATURE_MMX ) | |
| #define DEMOSAIC_DELAYLINES 4 | |
| // Forward references | |
| void AllocDecoderGroup(DECODER *decoder); | |
| bool AllocDecoderBuffer(DECODER *decoder, int width, int height, int format); | |
| void EraseDecoderFrames(DECODER *decoder); | |
| TRANSFORM *AllocGroupTransform(GROUP *group, int channel); | |
| void EraseOutputBuffer(uint8_t *buffer, int width, int height, int32_t pitch, int format); | |
| #if _DEBUG | |
| bool DecodeBandFSM16sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, FILE *logfile); | |
| #else | |
| bool DecodeBandFSM16sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch); | |
| #endif | |
| bool DecodeBandFSM16sNoGapHighByte(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, int quant); | |
| bool DecodeBandFSM16sNoGap2Pass(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, int quant); | |
| void CopyLowpassRGB444ToBuffer(DECODER *decoder, IMAGE *image_array[], int num_channels, | |
| uint8_t *output_buffer, int32_t output_pitch, | |
| FRAME_INFO *info, int chroma_offset, | |
| int precision); | |
| extern void Row16uQuarter2OutputFormat(DECODER *decoder, FRAME_INFO *info, int thread_index, | |
| uint8_t *output, int pitch, int frame, void *scratch, size_t scratch_size, int threading, | |
| uint8_t *channeldata[TRANSFORM_MAX_CHANNELS], // used in quarter res decodes | |
| int channelpitch[TRANSFORM_MAX_CHANNELS]); // used in quarter res decodes); | |
| //extern void ComputeCube(DECODER *decoder); | |
| extern bool NeedCube(DECODER *decoder); | |
| extern void LoadTweak(); | |
| //extern int g_topdown; | |
| //extern int g_bottomup; | |
| // Performance measurements | |
| #if _TIMING | |
| extern TIMER tk_decompress; // Timers | |
| extern TIMER tk_decoding; | |
| extern TIMER tk_convert; | |
| extern TIMER tk_inverse; | |
| extern COUNTER decode_byte_count; // Counters | |
| extern COUNTER sample_byte_count; | |
| extern COUNTER alloc_group_count; | |
| extern COUNTER alloc_transform_count; | |
| extern COUNTER alloc_buffer_count; | |
| extern COUNTER spatial_decoding_count; | |
| extern COUNTER temporal_decoding_count; | |
| extern COUNTER progressive_decode_count; | |
| #endif | |
| #if 0 | |
| // Table that maps from decoded format to pixel size | |
| static const int PixelSize[] = | |
| { | |
| 0, // DECODED_FORMAT_UNSUPPORTED | |
| 2, // DECODED_FORMAT_YUYV | |
| 2, // DECODED_FORMAT_UYVY | |
| 2, // DECODED_FORMAT_420 | |
| 4, // DECODED_FORMAT_RGB32 | |
| 3, // DECODED_FORMAT_RGB24 | |
| 2, // DECODED_FORMAT_RGB555 | |
| 2, // DECODED_FORMAT_RGB565 | |
| #if 0 | |
| 2, // DECODED_FORMAT_YUYV_INVERTED | |
| 2, // DECODED_FORMAT_UYVY_INVERTED | |
| 2, // DECODED_FORMAT_420_INVERTED | |
| #endif | |
| 4, // DECODED_FORMAT_RGB32_INVERTED | |
| 3, // DECODED_FORMAT_RGB24_INVERTED | |
| 2, // DECODED_FORMAT_RGB555_INVERTED | |
| 2, // DECODED_FORMAT_RGB565_INVERTED | |
| 3, // DECODED_FORMAT_V210, | |
| 4, // DECODED_FORMAT_YU64, // Custom 16 bits per channel (all data scaled up) YUYV format. | |
| 4, // DECODED_FORMAT_YR16 // Rows of YUV with 16 bits per channel | |
| }; | |
| #if _DEBUG | |
| char *decoded_format_string[] = | |
| { | |
| "Unsupported", | |
| "YUYV", | |
| "UYUV", | |
| "420", | |
| "RGB32", | |
| "RGB24", | |
| "RGB555", | |
| "RGB565", | |
| #if 0 | |
| "YUYV Inverted", | |
| "UYVY Inverted", | |
| "420 Inverted", | |
| #endif | |
| //#if BUILD_PROSPECT | |
| "RGB32 Inverted", | |
| "RGB24 Inverted", | |
| "RGB555 Inverted", | |
| "RGB565 Inverted", | |
| "V210" | |
| //#endif | |
| }; | |
| #endif | |
| #else | |
| static const int pixel_size_table[] = | |
| { | |
| 0, // COLOR_FORMAT_UNKNOWN | |
| 2, // COLOR_FORMAT_UYVY | |
| 2, // COLOR_FORMAT_YUYV | |
| 2, // COLOR_FORMAT_YVYU | |
| 0, // COLOR_FORMAT_YV12 | |
| 0, // COLOR_FORMAT_I420 | |
| 2, // COLOR_FORMAT_RGB16 | |
| 3, // COLOR_FORMAT_RGB24 | |
| 4, // COLOR_FORMAT_RGB32 | |
| 0, | |
| 3, // COLOR_FORMAT_V210 | |
| 0, // COLOR_FORMAT_RGB10 | |
| 4, // COLOR_FORMAT_YU64 | |
| 4, // COLOR_FORMAT_YR16 | |
| 4, // COLOR_FORMAT_YUVA | |
| }; | |
| static const int pixel_size_table_length = sizeof(pixel_size_table)/sizeof(pixel_size_table[0]); | |
| static int PixelSize(int format) | |
| { | |
| int pixel_size = 0; | |
| // Mask off the other fields in the format descriptor | |
| // Use the lookup table to determine the pixel size (if possible) | |
| if (0 <= format && format < pixel_size_table_length) | |
| { | |
| pixel_size = pixel_size_table[format]; | |
| //return pixel_size; | |
| } | |
| //TODO: Change the rest of this routine into one big switch statement | |
| // Is this an Avid format? | |
| else if (COLOR_FORMAT_AVID <= format && format <= COLOR_FORMAT_AVID_END) | |
| { | |
| switch (format) | |
| { | |
| case COLOR_FORMAT_CbYCrY_8bit: | |
| case COLOR_FORMAT_CbYCrY_10bit_2_8: // Only valid for the lower plane | |
| pixel_size = 1; | |
| break; | |
| case COLOR_FORMAT_CbYCrY_16bit: | |
| case COLOR_FORMAT_CbYCrY_16bit_2_14: | |
| case COLOR_FORMAT_CbYCrY_16bit_10_6: | |
| pixel_size = 2; | |
| break; | |
| default: | |
| assert(0); | |
| pixel_size = 2; // Assume 16 bits per pixel if the format is unknown | |
| break; | |
| } | |
| } | |
| // Is this a Bayer format? | |
| else if (COLOR_FORMAT_BAYER <= format && format <= COLOR_FORMAT_BAYER_END) | |
| { | |
| pixel_size = (format - 100); | |
| if(pixel_size > 2) | |
| pixel_size = 2; | |
| } | |
| else if (format == COLOR_FORMAT_RG48) | |
| pixel_size = 6; | |
| else if (format == COLOR_FORMAT_RG64) | |
| pixel_size = 8; | |
| else if (format == COLOR_FORMAT_B64A) { | |
| pixel_size = 8; | |
| } | |
| return pixel_size; | |
| } | |
| #endif | |
| int DecodedPixelSize(DECODED_FORMAT format) | |
| { | |
| int pixel_size = 0; | |
| // Compute the pixel size | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_YUYV: | |
| pixel_size = 2; | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| pixel_size = 4; | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| pixel_size = 6; | |
| break; | |
| case DECODED_FORMAT_CT_UCHAR: | |
| pixel_size = 2; | |
| break; | |
| case DECODED_FORMAT_CT_SHORT: | |
| case DECODED_FORMAT_CT_SHORT_2_14: | |
| case DECODED_FORMAT_CT_USHORT_10_6: | |
| pixel_size = 4; | |
| break; | |
| case DECODED_FORMAT_CT_10Bit_2_8: | |
| case DECODED_FORMAT_V210: | |
| // This routine should not be called to compute the pixel sizes for these formats | |
| assert(0); | |
| return 0; | |
| break; | |
| case DECODED_FORMAT_ROW16U: | |
| pixel_size = 4; | |
| break; | |
| default: | |
| assert(0); | |
| return 0; | |
| break; | |
| } | |
| return pixel_size; | |
| } | |
| #if 0 | |
| // Convert FOURCC code to a string | |
| static void str4cc(char *string, uint32_t marker) | |
| { | |
| char *p = (char *)&marker + 3; | |
| char *s = string; | |
| int i; | |
| for (i = 0; i < 4; i++) | |
| *(s++) = *(p--); | |
| *s = '\0'; | |
| } | |
| #endif | |
| void GetDisplayAspectRatio(DECODER *decoder, int *w, int *h) | |
| { | |
| int origw,origh, guess = 0; | |
| origw = decoder->frame.width; | |
| origh = decoder->frame.height; | |
| switch(decoder->frame.resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL: | |
| break; | |
| case DECODED_RESOLUTION_HALF: | |
| origw *= 2; | |
| origh *= 2; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| origw *= 4; | |
| origh *= 4; | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| origw *= 8; | |
| origh *= 8; | |
| break; | |
| case DECODED_RESOLUTION_FULL_DEBAYER: | |
| break; | |
| case DECODED_RESOLUTION_HALF_NODEBAYER: | |
| origw *= 2; | |
| origh *= 2; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER_NODEBAYER_SCALED: | |
| origw *= 4; | |
| origh *= 4; | |
| break; | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| //origw *= 2; //DAN20110129 -- seems the width has been corrected elsewhere or was never halved. | |
| break; | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL: | |
| origw *= 2; | |
| break; | |
| case DECODED_RESOLUTION_HALF_VERTICAL: | |
| origh *= 2; | |
| break; | |
| } | |
| if(decoder->codec.picture_aspect_x <= 0 || decoder->codec.picture_aspect_y <= 0) | |
| guess = 1; | |
| // if guess default values, we can't trust them | |
| if(decoder->codec.picture_aspect_x == 16 && decoder->codec.picture_aspect_y == 9) | |
| guess = 1; | |
| if(decoder->pixel_aspect_x && decoder->pixel_aspect_y) | |
| { | |
| int j,den,num; | |
| decoder->codec.picture_aspect_x = num = (origw * decoder->pixel_aspect_x) / decoder->pixel_aspect_y; | |
| decoder->codec.picture_aspect_y = den = origh; | |
| for(j=2; j<num+den; j++) | |
| { | |
| while(num == (num/j)*j && den == (den/j)*j) | |
| { | |
| num /= j; | |
| den /= j; | |
| } | |
| } | |
| decoder->codec.picture_aspect_x = num; | |
| decoder->codec.picture_aspect_y = den; | |
| guess = 0; | |
| } | |
| if(guess) | |
| { | |
| if(origw > 720) //HD. | |
| { | |
| if(origh == 1080) | |
| { | |
| if(origw == 2048) | |
| *w=origw,*h=origh; | |
| else | |
| *w=16,*h=9; // assume 16x9 | |
| } | |
| else if(origh == 720) | |
| { | |
| *w=16,*h=9; // assume 16x9 | |
| } | |
| else | |
| { | |
| *w=origw,*h=origh; // assume square pixel. | |
| } | |
| } | |
| else | |
| { | |
| if(origh == 720) | |
| { | |
| *w=16,*h=9; // assume 16x9 | |
| } | |
| else | |
| { | |
| *w=origw,*h=origh; // assume square pixel. | |
| } | |
| } | |
| } | |
| else | |
| { | |
| *w=decoder->codec.picture_aspect_x; | |
| *h=decoder->codec.picture_aspect_y; | |
| } | |
| } | |
| bool IsValidFrameResolution(int resolution) | |
| { | |
| switch (resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL: | |
| case DECODED_RESOLUTION_HALF: | |
| case DECODED_RESOLUTION_QUARTER: | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| return true; | |
| default: | |
| return false; | |
| } | |
| } | |
| // Return true if this decoder can decode to quarter resolution | |
| bool IsQuarterResolutionEnabled(DECODER *decoder) | |
| { | |
| return true; | |
| } | |
| size_t DecoderSize() | |
| { | |
| return sizeof(DECODER); | |
| } | |
| void InitDecoder(DECODER *decoder, FILE *logfile, CODESET *cs) | |
| { | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "InitDecoder, decoder: 0x%p\n", decoder); | |
| } | |
| #endif | |
| { | |
| //TODO: Clear the decoder before setting the CPU limit and affinity | |
| int i; | |
| //int thread_limit=0, thread_affinity=0, set_thread_params=0, capabilities=0; | |
| //save key params | |
| Thread_cntrl saved_params = decoder->thread_cntrl; | |
| // Clear everything | |
| memset(decoder, 0, sizeof(DECODER)); | |
| //restore key params | |
| if(saved_params.set_thread_params == 1) // used by the DShow Interface | |
| { | |
| decoder->thread_cntrl = saved_params; | |
| } | |
| #if _TIMING | |
| InitTiming(); | |
| #endif | |
| // Set the file for status information during decoding | |
| decoder->logfile = logfile; | |
| // Initialize the decoding error to no error | |
| decoder->error = CODEC_ERROR_OKAY; | |
| // Most recent marker found during decoding | |
| decoder->marker = 0; | |
| // Count of frames decoded | |
| decoder->frame_count = 0; | |
| // Set the codebooks that will be used for decoding | |
| if (cs != NULL) | |
| { | |
| // Use the codeset provided in the call | |
| for(i=0; i<CODEC_NUM_CODESETS; i++) | |
| { | |
| // Codebook for decoding highpass coefficients | |
| decoder->magsbook[i] = cs[i].magsbook; | |
| // Codebook for decoding runs of coefficients | |
| decoder->runsbook[i] = cs[i].runsbook; | |
| // Lookup table for fast codebook search | |
| decoder->fastbook[i] = cs[i].fastbook; | |
| } | |
| } | |
| else | |
| { | |
| // Use the default codeset | |
| decoder->magsbook[0] = cs9.magsbook; | |
| decoder->runsbook[0] = cs9.runsbook; | |
| decoder->fastbook[0] = cs9.fastbook; | |
| } | |
| // Initialize the codec state | |
| InitCodecState(&decoder->codec); | |
| InitScratchBuffer(&decoder->scratch, NULL, 0); | |
| #if _DUMP | |
| // Initialize the descriptor for controlling debug output | |
| decoder->dump.enabled = false; | |
| decoder->dump.channel_mask = 0; | |
| decoder->dump.wavelet_mask = 0; | |
| memset(decoder->dump.directory, 0, sizeof(decoder->dump.directory)); | |
| memset(decoder->dump.filename, 0, sizeof(decoder->dump.filename)); | |
| #endif | |
| } | |
| //REDTEST | |
| decoder->frm = 0; | |
| decoder->run = 1; | |
| #if _ALLOCATOR | |
| decoder->allocator = NULL; | |
| #endif | |
| decoder->initialized = 1; //DAN20060912 | |
| } | |
| void InitDecoderLicense(DECODER *decoder, const unsigned char *licensekey) | |
| { | |
| if (decoder && licensekey) | |
| { | |
| const unsigned char unlicensed[16] = {0}; | |
| //memset(unlicensed, 0, sizeof(unlicensed)); | |
| // Has the license been set? | |
| if (memcmp(decoder->licensekey, unlicensed, sizeof(decoder->licensekey)) == 0) | |
| { | |
| // Copy the license into the decoder | |
| memcpy(decoder->licensekey, licensekey, sizeof(decoder->licensekey)); | |
| } | |
| } | |
| } | |
| // Free data allocated within the decoder | |
| void ClearDecoder(DECODER *decoder) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| // Free the transforms allocated in the decoder | |
| int i; | |
| if(decoder->initialized == 0) | |
| return; // nothing to free //DAN20060912 | |
| #if _GRAPHICS | |
| DrawClose(decoder); | |
| #endif | |
| for(i=0; i<=METADATA_PRIORITY_MAX; i++) | |
| { | |
| if(decoder->DataBases[i]) | |
| { | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->DataBases[i]); | |
| #else | |
| MEMORY_FREE(decoder->DataBases[i]); | |
| #endif | |
| decoder->DataBases[i] = NULL; | |
| decoder->DataBasesSize[i] = 0; | |
| decoder->DataBasesAllocSize[i] = 0; | |
| } | |
| } | |
| if(decoder->sqrttable) | |
| { | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->sqrttable); | |
| #else | |
| MEMORY_FREE(decoder->sqrttable); | |
| #endif | |
| decoder->sqrttable = NULL; | |
| } | |
| for (i = 0; i < TRANSFORM_MAX_CHANNELS; i++) | |
| { | |
| #if _ALLOCATOR | |
| FreeTransform(allocator, decoder->transform[i]); | |
| #else | |
| FreeTransform(decoder->transform[i]); | |
| #endif | |
| decoder->transform[i] = NULL; | |
| } | |
| if(decoder->aligned_sample_buffer) | |
| { | |
| #if _ALLOCATOR | |
| FreeAligned(decoder->allocator, decoder->aligned_sample_buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->aligned_sample_buffer); | |
| #endif | |
| decoder->aligned_sample_buffer = NULL; | |
| decoder->aligned_sample_buffer_size = 0; | |
| } | |
| if(decoder->tools) | |
| { | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->tools); | |
| #else | |
| MEMORY_FREE(decoder->tools); | |
| #endif | |
| decoder->tools = NULL; | |
| } | |
| // Free the buffer allocated for decoding | |
| if (decoder->buffer != NULL) | |
| { | |
| #if DEBUG_BUFFER_USAGE | |
| int i; | |
| char *ptr = (char *)decoder->buffer; | |
| FILE *fp = fopen("C:/free.txt", "a"); | |
| fprintf(fp, "decoder->buffer = %08x buffer_size = %d\n", decoder->buffer ,decoder->buffer_size); | |
| i = decoder->buffer_size-1; | |
| while(ptr[i] == 1) i--; | |
| fprintf(fp, "used %2.3f percent\n", 100.0*(float)i/(float)decoder->buffer_size); | |
| fclose(fp); | |
| #endif | |
| #if _ALLOCATOR | |
| FreeAligned(allocator, decoder->buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->buffer); | |
| #endif | |
| decoder->buffer = NULL; | |
| decoder->buffer_size = 0; | |
| // Clear the fields in the scratch buffer descriptor | |
| memset(&decoder->scratch, 0, sizeof(SCRATCH)); | |
| // Eventually the buffer and buffer size fields will be obsolete | |
| } | |
| for(i=0;i<_MAX_CPUS;i++) | |
| { | |
| if(decoder->threads_buffer[i]) | |
| { | |
| #if _ALLOCATOR | |
| FreeAligned(decoder->allocator, decoder->threads_buffer[i]); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->threads_buffer[i]); | |
| #endif | |
| decoder->threads_buffer[i] = NULL; | |
| } | |
| } | |
| decoder->threads_buffer_size = 0; | |
| // Do not attempt to free the codebooks since the | |
| // codebook pointers are references to static tables | |
| // Can free some of the data structures allocated by the decoder | |
| FreeCodebooks(decoder); | |
| #if _INTERLACED_WORKER_THREADS | |
| if(decoder->interlaced_worker.lock_init) // threads started | |
| { | |
| int i; | |
| // Signal this thread to stop | |
| SetEvent(decoder->interlaced_worker.stop_event); | |
| // Free all handles used by the worker threads | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| WaitForSingleObject(decoder->interlaced_worker.handle[i], INFINITE); //JY20080307 | |
| CloseHandle(decoder->interlaced_worker.handle[i]); | |
| CloseHandle(decoder->interlaced_worker.start_event[i]); | |
| CloseHandle(decoder->interlaced_worker.done_event[i]); | |
| } | |
| CloseHandle(decoder->interlaced_worker.row_semaphore); | |
| CloseHandle(decoder->interlaced_worker.stop_event); | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| decoder->interlaced_worker.handle[i] = 0; | |
| decoder->interlaced_worker.start_event[i] = 0; | |
| decoder->interlaced_worker.done_event[i] = 0; | |
| } | |
| decoder->interlaced_worker.row_semaphore = 0; | |
| decoder->interlaced_worker.stop_event = 0; | |
| } | |
| // Free the critical section used by the worker threads | |
| DeleteCriticalSection(&decoder->interlaced_worker.lock); | |
| decoder->interlaced_worker.lock_init = 0; | |
| #endif | |
| #if _THREADED | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->entropy_worker_new.pool); | |
| DeleteLock(&decoder->entropy_worker_new.lock); | |
| } | |
| if(decoder->worker_thread.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->worker_thread.pool); | |
| DeleteLock(&decoder->worker_thread.lock); | |
| } | |
| if(decoder->draw_thread.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->draw_thread.pool); | |
| DeleteLock(&decoder->draw_thread.lock); | |
| } | |
| /* | |
| if(decoder->qt_convert_worker.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->qt_convert_worker.pool); | |
| DeleteLock(&decoder->qt_convert_worker.lock); | |
| } | |
| if(decoder->qt_scale_worker.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->qt_scale_worker.pool); | |
| DeleteLock(&decoder->qt_scale_worker.lock); | |
| } | |
| */ | |
| if(decoder->parallelDecoder) | |
| { | |
| if(decoder->parallelDecoder->decoder_thread.pool.thread_count) | |
| { | |
| ThreadPoolDelete(&decoder->parallelDecoder->decoder_thread.pool); | |
| DeleteLock(&decoder->parallelDecoder->decoder_thread.lock); | |
| decoder->parallelDecoder->decoder_thread.pool.thread_count = 0; | |
| } | |
| ClearDecoder(decoder->parallelDecoder); | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->parallelDecoder); | |
| #else | |
| MEMORY_FREE(decoder->parallelDecoder); | |
| #endif | |
| decoder->parallelDecoder = NULL; | |
| } | |
| #endif | |
| //MEMORY_ALIGNED_FREE(RawBayer16); | |
| #if _ALLOCATOR | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = 0; | |
| decoder->RGBFilterBufferSize = 0; | |
| } | |
| if(decoder->RawBayer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RawBayer16); | |
| decoder->RawBayer16 = 0; | |
| decoder->RawBayerSize = 0; | |
| } | |
| if(decoder->StereoBuffer) | |
| { | |
| FreeAligned(decoder->allocator, decoder->StereoBuffer); | |
| decoder->StereoBuffer = 0; | |
| decoder->StereoBufferSize = 0; | |
| } | |
| if(decoder->RawCube) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RawCube); | |
| decoder->RawCube = 0; | |
| } | |
| if(decoder->Curve2Linear) | |
| { | |
| FreeAligned(decoder->allocator, decoder->Curve2Linear); | |
| decoder->Curve2Linear = 0; | |
| } | |
| if(decoder->Linear2CurveRed) | |
| { | |
| FreeAligned(decoder->allocator, decoder->Linear2CurveRed); | |
| decoder->Linear2CurveRed = NULL; | |
| } | |
| if(decoder->Linear2CurveGrn) | |
| { | |
| FreeAligned(decoder->allocator, decoder->Linear2CurveGrn); | |
| decoder->Linear2CurveGrn = NULL; | |
| } | |
| if(decoder->Linear2CurveBlu) | |
| { | |
| FreeAligned(decoder->allocator, decoder->Linear2CurveBlu); | |
| decoder->Linear2CurveBlu = NULL; | |
| } | |
| if(decoder->BYR4LinearRestore) | |
| { | |
| FreeAligned(decoder->allocator, decoder->BYR4LinearRestore); | |
| decoder->BYR4LinearRestore = NULL; | |
| } | |
| if(decoder->GammaContrastRed) | |
| { | |
| FreeAligned(decoder->allocator, decoder->GammaContrastRed); | |
| decoder->GammaContrastRed = NULL; | |
| } | |
| if(decoder->GammaContrastGrn) | |
| { | |
| FreeAligned(decoder->allocator, decoder->GammaContrastGrn); | |
| decoder->GammaContrastGrn = NULL; | |
| } | |
| if(decoder->GammaContrastBlu) | |
| { | |
| FreeAligned(decoder->allocator, decoder->GammaContrastBlu); | |
| decoder->GammaContrastBlu = NULL; | |
| } | |
| //3d LUT | |
| { | |
| if(decoder->LUTcache) | |
| Free(decoder->allocator, decoder->LUTcache); | |
| decoder->LUTcache = NULL; | |
| decoder->LUTcacheCRC = 0; | |
| } | |
| #if WARPSTUFF | |
| { | |
| if (decoder->lens_correct_buffer) | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->lens_correct_buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->lens_correct_buffer); | |
| #endif | |
| if (decoder->mesh) | |
| geomesh_destroy(decoder->mesh); | |
| decoder->lastLensOffsetX = 0; | |
| decoder->lastLensOffsetY = 0; | |
| decoder->lastLensOffsetZ = 0; | |
| decoder->lastLensOffsetR = 0; | |
| decoder->lastLensZoom = 0; | |
| decoder->lastLensFishFOV = 0; | |
| decoder->lastLensGoPro = 0; | |
| decoder->lastLensSphere = 0; | |
| decoder->lastLensFill = 0; | |
| decoder->lastLensStyleSel = 0; | |
| memset(decoder->lastLensCustomSRC, 0, sizeof(decoder->lastLensCustomSRC)); | |
| memset(decoder->lastLensCustomDST, 0, sizeof(decoder->lastLensCustomDST)); | |
| decoder->mesh = NULL; | |
| decoder->lens_correct_buffer = NULL; | |
| } | |
| #endif | |
| if(decoder->overrideData) | |
| { | |
| Free(decoder->allocator, decoder->overrideData); | |
| decoder->overrideData = NULL; | |
| decoder->overrideSize = 0; | |
| } | |
| for(i=0; i<64; i++) | |
| { | |
| if(decoder->mdc[i]) | |
| Free(decoder->allocator, decoder->mdc[i]); | |
| decoder->mdc[i] = NULL; | |
| decoder->mdc_size[i] = 0; | |
| } | |
| #else | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| } | |
| if(decoder->RawBayer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| } | |
| if(decoder->StereoBuffer) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->StereoBuffer); | |
| decoder->StereoBuffer = NULL; | |
| decoder->StereoBufferSize = 0; | |
| } | |
| if(decoder->RawCube) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RawCube); | |
| decoder->RawCube = NULL; | |
| } | |
| if(decoder->Curve2Linear) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->Curve2Linear); | |
| decoder->Curve2Linear = NULL; | |
| } | |
| if(decoder->BYR4LinearRestore) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->BYR4LinearRestore); | |
| decoder->BYR4LinearRestore = NULL; | |
| } | |
| if(decoder->Linear2CurveRed) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->Linear2CurveRed); | |
| decoder->Linear2CurveRed = NULL; | |
| } | |
| if(decoder->Linear2CurveGrn) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->Linear2CurveGrn); | |
| decoder->Linear2CurveGrn = NULL; | |
| } | |
| if(decoder->Linear2CurveBlu) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->Linear2CurveBlu); | |
| decoder->Linear2CurveBlu = NULL; | |
| } | |
| if(decoder->GammaContrastRed) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->GammaContrastRed); | |
| decoder->GammaContrastRed = NULL; | |
| } | |
| if(decoder->GammaContrastGrn) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->GammaContrastGrn); | |
| decoder->GammaContrastGrn = NULL; | |
| } | |
| if(decoder->GammaContrastBlu) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->GammaContrastBlu); | |
| decoder->GammaContrastBlu = NULL; | |
| } | |
| //3d LUT | |
| { | |
| if(decoder->LUTcache) | |
| MEMORY_FREE(decoder->LUTcache); | |
| decoder->LUTcache = NULL; | |
| decoder->LUTcacheCRC = 0; | |
| } | |
| #if WARPSTUFF | |
| { | |
| if (decoder->lens_correct_buffer) | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->lens_correct_buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->lens_correct_buffer); | |
| #endif | |
| if (decoder->mesh) | |
| geomesh_destroy(mesh); | |
| decoder->mesh = NULL; | |
| decoder->lens_correct_buffer = NULL; | |
| decoder->lastLensOffsetX = 0; | |
| decoder->lastLensOffsetY = 0; | |
| decoder->lastLensOffsetZ = 0; | |
| decoder->lastLensOffsetR = 0; | |
| decoder->lastLensZoom = 0; | |
| decoder->lastLensFishFOV = 0; | |
| decoder->lastLlensGoPro = 0; | |
| decoder->lastLlensSphere = 0; | |
| decoder->lastLlensFill = 0; | |
| decoder->lastLlensStyleSel = 0; | |
| memset(decoder->lastLensCustomSRC, 0, sizeof(decoder->lastLensCustomSRC)); | |
| memset(decoder->lastLensCustomDST, 0, sizeof(decoder->lastLensCustomDST)); | |
| } | |
| #endif | |
| if(decoder->overrideData) | |
| { | |
| MEMORY_FREE(decoder->overrideData); | |
| decoder->overrideData = NULL; | |
| decoder->overrideSize = 0; | |
| } | |
| for(i=0; i<64; i++) | |
| { | |
| if(decoder->mdc[i]) | |
| MEMORY_FREE(decoder->mdc[i]); | |
| decoder->mdc[i] = NULL; | |
| decoder->mdc_size[i] = 0; | |
| } | |
| #endif | |
| #ifdef SPI_LOADER | |
| SPIReleaseAll(decoder); | |
| //KeyframesReleaseAll(decoder); | |
| #endif | |
| decoder->initialized = 0;// cleared | |
| } | |
| void ExitDecoder(DECODER *decoder) | |
| { | |
| // Let the caller keep the logfile open or choose to close it | |
| //if (logfile) fclose(logfile); | |
| // Free data allocated within the decoder | |
| ClearDecoder(decoder); | |
| } | |
| // Allocate the data structures for decoding a group | |
| void AllocDecoderGroup(DECODER *decoder) | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels;//DAN07022004 | |
| int channel; | |
| assert(decoder->codec.num_channels <= TRANSFORM_MAX_CHANNELS); //DAN07022004 | |
| for (channel = 0; channel < TRANSFORM_MAX_CHANNELS; channel++)//DAN07022004 | |
| { | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| // Need to allocate a transform data structure? | |
| if (transform == NULL) { | |
| #if _ALLOCATOR | |
| transform = (TRANSFORM *)Alloc(allocator, sizeof(TRANSFORM)); | |
| #else | |
| transform = (TRANSFORM *)MEMORY_ALLOC(sizeof(TRANSFORM)); | |
| #endif | |
| assert(transform != NULL); | |
| if (transform == NULL) { | |
| decoder->error = CODEC_ERROR_TRANSFORM_MEMORY; | |
| return; | |
| } | |
| memset(transform, 0, sizeof(TRANSFORM)); | |
| decoder->transform[channel] = transform; | |
| #if _TIMING | |
| alloc_transform_count++; | |
| #endif | |
| } | |
| } | |
| } | |
| // Allocate the buffer used for intermediate results during decoding | |
| bool AllocDecoderBuffer(DECODER *decoder, int width, int height, int format) | |
| { | |
| int cpus; | |
| size_t size; | |
| size_t row_size; | |
| char *buffer; | |
| #if 0 | |
| // Allocate a buffer large enough for six rows of cache lines | |
| size = width * sizeof(PIXEL); | |
| size = ALIGN(size, _CACHE_LINE_SIZE); | |
| size = 2 * TRANSFORM_MAX_CHANNELS * size; | |
| #else | |
| // Allocate a buffer large enough for nine rows of cache lines | |
| size = width * sizeof(PIXEL) * 4; | |
| size = ALIGN(size, _CACHE_LINE_SIZE); | |
| size = 3 * TRANSFORM_MAX_CHANNELS * size; | |
| #endif | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_V210: | |
| case DECODED_FORMAT_YU64: | |
| // Increase the buffer size for decoding to the V210 format | |
| row_size = 4 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 4 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_YR16: | |
| case DECODED_FORMAT_CbYCrY_10bit_2_8: | |
| case DECODED_FORMAT_CbYCrY_16bit_2_14: | |
| case DECODED_FORMAT_CbYCrY_16bit_10_6: | |
| // Increase the buffer size for decoding to the YUV16 format | |
| row_size = 4 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 8 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_WP13: | |
| // Increase the buffer size for decoding to the YUV16 format | |
| row_size = 6 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 12 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| // Increase the buffer size for decoding to the YUV16 format | |
| row_size = 8 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 16 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_BYR3: | |
| // Increase the buffer size for decoding to the YUV16 format | |
| row_size = 2 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 4 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_BYR4: | |
| // Increase the buffer size for decoding to the YUV16 format | |
| row_size = 2 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 4 * 2 * row_size; | |
| break; | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_W13A: | |
| // Increase the buffer size for decoding to the B64A format | |
| row_size = 8 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 16 * 2 * row_size; | |
| break; | |
| default: | |
| // Increase the buffer size for YUV to RGB conversion | |
| row_size = 3 * width * sizeof(PIXEL); | |
| row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| size += 2 * 2 * row_size; | |
| break; | |
| } | |
| cpus = decoder->thread_cntrl.capabilities >> 16; | |
| if(cpus > 4) | |
| size *= 4; | |
| if(cpus > 16) //DAN20120803 -- 4444 clips | |
| size *= 2; | |
| // Has a buffer already been allocated? | |
| if (decoder->buffer != NULL) | |
| { | |
| // Is the buffer large enough? | |
| if (decoder->buffer_size < size) | |
| { | |
| // Free the previous buffer | |
| #if _ALLOCATOR | |
| FreeAligned(decoder->allocator, decoder->buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->buffer); | |
| #endif | |
| decoder->buffer = NULL; | |
| decoder->buffer_size = 0; | |
| } | |
| else | |
| { | |
| return true; | |
| } | |
| } | |
| buffer = decoder->buffer; | |
| if(buffer == NULL) | |
| { | |
| // Allocate the decoding buffer | |
| #if _ALLOCATOR | |
| buffer = (char *)AllocAligned(decoder->allocator, size, _CACHE_LINE_SIZE); | |
| #else | |
| buffer = (char *)MEMORY_ALIGNED_ALLOC(size, _CACHE_LINE_SIZE); | |
| #endif | |
| if(buffer == NULL) | |
| { | |
| return false; | |
| } | |
| } | |
| #if DEBUG_BUFFER_USAGE | |
| memset(buffer, 1, size); | |
| #endif | |
| // Save the buffer and its size in the decoder | |
| decoder->buffer = buffer; | |
| decoder->buffer_size = size; | |
| // Initialize the scratch space descriptor | |
| InitScratchBuffer(&decoder->scratch, buffer, size); | |
| // allocate buffer for each debayer/color formating thread | |
| { | |
| int i; | |
| size = (width+16)*3*2*4*2*4;// sixteen lines | |
| if(height*4 > width*3) //square or tall images where running out of scratch space for zooms. | |
| size *= 1 + ((height+(width/2))/width); | |
| if (decoder->threads_buffer_size < size) | |
| { | |
| for(i=0;i<_MAX_CPUS;i++) | |
| { | |
| if(decoder->threads_buffer[i]) | |
| { | |
| #if _ALLOCATOR | |
| FreeAligned(decoder->allocator, decoder->threads_buffer[i]); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->threads_buffer[i]); | |
| #endif | |
| decoder->threads_buffer[i] = NULL; | |
| } | |
| } | |
| decoder->threads_buffer_size = 0; | |
| } | |
| for(i=0;i<cpus;i++) | |
| { | |
| if(decoder->threads_buffer[i] == NULL) | |
| { | |
| #if _ALLOCATOR | |
| decoder->threads_buffer[i] = (char *)AllocAligned(decoder->allocator, size, _CACHE_LINE_SIZE); | |
| #else | |
| decoder->threads_buffer[i] = (char *)MEMORY_ALIGNED_ALLOC(size, _CACHE_LINE_SIZE); | |
| #endif | |
| if(decoder->threads_buffer[i] == NULL) | |
| { | |
| return false; | |
| } | |
| } | |
| } | |
| decoder->threads_buffer_size = size; | |
| } | |
| // Eventually the scratch space descriptor will replace the buffer and buffer_size fields | |
| return true; | |
| } | |
| bool ResizeDecoderBuffer(DECODER *decoder, int width, int height, int format) | |
| { | |
| // Check that the dimensions are valid | |
| assert(width > 0); | |
| assert(height > 0); | |
| // Just call the allocation routine | |
| return AllocDecoderBuffer(decoder, width, height, format); | |
| } | |
| void ClearTransformFlags(DECODER *decoder) | |
| { | |
| TRANSFORM **transform_array = decoder->transform; | |
| int channel; | |
| for (channel = 0; channel < TRANSFORM_MAX_CHANNELS; channel++) | |
| { | |
| TRANSFORM *transform = transform_array[channel]; | |
| int index; | |
| if (transform == NULL) break; | |
| for (index = 0; index < TRANSFORM_MAX_WAVELETS; index++) | |
| { | |
| IMAGE *wavelet = transform->wavelet[index]; | |
| if (wavelet != NULL) { | |
| wavelet->band_valid_flags = 0; | |
| wavelet->band_started_flags = 0; | |
| } | |
| } | |
| } | |
| } | |
| // Initialize the tables for decoding the wavelet transforms | |
| void InitWaveletDecoding(DECODER *decoder, int subband_wavelet_index[], int subband_band_index[], int num_subbands) | |
| { | |
| size_t subband_table_size = num_subbands * sizeof(int); | |
| memset(decoder->subband_wavelet_index, 0, sizeof(decoder->subband_wavelet_index)); | |
| memcpy(decoder->subband_wavelet_index, subband_wavelet_index, subband_table_size); | |
| memset(decoder->subband_band_index, 0, sizeof(decoder->subband_band_index)); | |
| memcpy(decoder->subband_band_index, subband_band_index, subband_table_size); | |
| } | |
| #if 0 | |
| static bool IsValidFormat(int format) | |
| { | |
| bool valid_format = true; | |
| //TODO: Change this routine into a switch statement | |
| if(format == COLOR_FORMAT_BYR5) | |
| return true; // can decode to BYR5 | |
| if(format == COLOR_FORMAT_BYR4) | |
| return true; // can decode to BYR4 | |
| if(format == COLOR_FORMAT_BYR3) | |
| return true; // can decode to BYR3 | |
| if(format == COLOR_FORMAT_BYR2) | |
| return true; // can decode to BYR2 | |
| if(format == COLOR_FORMAT_RG48) | |
| return true; // can decode to RGB48 | |
| if(format == COLOR_FORMAT_RG64) | |
| return true; // can decode to RGBA64 | |
| if (format == COLOR_FORMAT_B64A) | |
| { | |
| return true; // Can decode to B64A | |
| } | |
| if (!(COLOR_FORMAT_UNKNOWN < format && format <= MAX_DECODED_COLOR_FORMAT)) { | |
| valid_format = false; | |
| } | |
| return valid_format; | |
| } | |
| #endif | |
| #if _INTERLACED_WORKER_THREADS | |
| void StartInterlaceWorkerThreads(DECODER *decoder) | |
| { | |
| int i; | |
| if(decoder->interlaced_worker.lock_init == 0) | |
| { | |
| // Create events for starting the worker threads | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| decoder->interlaced_worker.start_event[i] = CreateEvent(NULL, false, false, NULL); | |
| } | |
| // Create a semaphore to signal the worker threads to process rows | |
| decoder->interlaced_worker.row_semaphore = CreateSemaphore(NULL, 0, LONG_MAX, NULL); | |
| // Create an event for each worker thread to signal that it has finished | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| decoder->interlaced_worker.done_event[i] = CreateEvent(NULL, false, false, NULL); | |
| } | |
| // Create an event for forcing the worker threads to terminate | |
| decoder->interlaced_worker.stop_event = CreateEvent(NULL, true, false, NULL); | |
| // Zero the count of worker threads that are active | |
| decoder->interlaced_worker.thread_count = 0; | |
| // Initialize the lock for controlling access to the worker thread data | |
| InitializeCriticalSection(&decoder->interlaced_worker.lock); | |
| decoder->interlaced_worker.lock_init = 1; | |
| for (i = 0; i < THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| decoder->interlaced_worker.id[i] = 0; | |
| decoder->interlaced_worker.handle[i] = CreateThread(NULL, 0, InterlacedWorkerThreadProc, decoder, 0, &decoder->interlaced_worker.id[i]); | |
| assert(decoder->interlaced_worker.handle[i] != NULL); | |
| } | |
| } | |
| } | |
| #endif | |
| #if 0 | |
| int TestException(int x) | |
| { | |
| static volatile int y1 = 100; | |
| volatile int x1 = x; | |
| return y1 / x1; | |
| } | |
| #endif | |
| // Process device driver request to initialize the decoder | |
| #if _ALLOCATOR | |
| bool DecodeInit(ALLOCATOR *allocator, DECODER *decoder, int width, int height, int format, int resolution, FILE *logfile) | |
| #else | |
| bool DecodeInit(DECODER *decoder, int width, int height, int format, int resolution, FILE *logfile) | |
| #endif | |
| { | |
| CODESET codesets[CODEC_NUM_CODESETS]; | |
| int i; | |
| int cpus; | |
| //int x = 0; | |
| #if CODEC_NUM_CODESETS == 3 | |
| memcpy(&codesets[0], &CURRENT_CODESET, sizeof(CODESET)); | |
| memcpy(&codesets[1], &SECOND_CODESET, sizeof(CODESET)); | |
| memcpy(&codesets[2], &THIRD_CODESET, sizeof(CODESET)); | |
| #elif CODEC_NUM_CODESETS == 2 | |
| memcpy(&codesets[0], &CURRENT_CODESET, sizeof(CODESET)); | |
| memcpy(&codesets[1], &SECOND_CODESET, sizeof(CODESET)); | |
| #else | |
| memcpy(&codesets[0], &CURRENT_CODESET, sizeof(CODESET)); | |
| #endif | |
| #ifdef _WIN32 | |
| // Set the handler for system exceptions | |
| SetDefaultExceptionHandler(); | |
| #endif | |
| //TestException(x); | |
| // Clear all decoder fields except the logfile and set the codebooks for decoding | |
| InitDecoder(decoder, logfile, &codesets[0]); | |
| #if _ALLOCATOR | |
| decoder->allocator = allocator; | |
| #endif | |
| if(decoder->thread_cntrl.capabilities == 0) | |
| { | |
| // Determine the processor capabilities | |
| SetDecoderCapabilities(decoder); | |
| } | |
| cpus = decoder->thread_cntrl.capabilities >> 16; | |
| assert(cpus > 0 && cpus <= _MAX_CPUS); | |
| // Decode to half resolution? | |
| if (resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| // Reduce the frame size by half in each dimension | |
| width = width/2; | |
| height = height/2; | |
| } | |
| else if (resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| // Reduce the frame size by one fourth in each dimension | |
| width = width/4; | |
| height = height/4; | |
| } | |
| // Initialize the codebooks | |
| #if _ALLOCATOR | |
| if (!InitCodebooks(decoder->allocator, codesets)) { | |
| //decoder->error = CODEC_ERROR_INIT_CODEBOOKS; | |
| // The subroutine has already set the error code | |
| return false; | |
| } | |
| #else | |
| if (!InitCodebooks(codesets)) { | |
| //decoder->error = CODEC_ERROR_INIT_CODEBOOKS; | |
| // The subroutine has already set the error code | |
| return false; | |
| } | |
| #endif | |
| // Initize the FSM | |
| InitDecoderFSM(decoder, &codesets[0]); | |
| // Check the frame dimensions and format | |
| //assert(width > 0); | |
| //assert(height > 0); | |
| // assert(IsValidFormat(format)); | |
| #if _THREADED_DECODER | |
| // Create a semaphore to signal the transform thread to begin processing | |
| // Initialize the transform queue | |
| decoder->transform_queue.started = 0; | |
| decoder->transform_queue.num_entries = 0; | |
| decoder->transform_queue.next_entry = 0; | |
| decoder->transform_queue.free_entry = 0; | |
| memset(decoder->transform_queue.queue, 0, sizeof(decoder->transform_queue.queue)); | |
| #endif | |
| #if _INTERLACED_WORKER_THREADS && _DELAY_THREAD_START==0 | |
| StartInterlaceWorkerThreads(decoder); | |
| #endif | |
| #if _THREADED | |
| #if !_DELAY_THREAD_START //start threads now if not _DELAY_THREAD_START | |
| if(cpus > 1) | |
| { | |
| int threads = cpus; | |
| if(threads > 4) | |
| threads = 4; | |
| CreateLock(&decoder->entropy_worker_new.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->entropy_worker_new.pool, | |
| threads, | |
| EntropyWorkerThreadProc, | |
| decoder); | |
| } | |
| // Initialize the lock that controls access to the generic worker thread data | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| cpus, | |
| WorkerThreadProc, | |
| decoder); | |
| #endif | |
| #endif | |
| // Set the frame dimensions and format | |
| SetDecoderFormat(decoder, width, height, format, resolution); | |
| // Allocate the data structure for decoding the samples | |
| AllocDecoderGroup(decoder); | |
| // Note that this code assumes that the samples to decode are groups | |
| // as opposed to isolated frames which are not supported in this code | |
| // Allocate a buffer for storing intermediate results during decoding | |
| if (!AllocDecoderBuffer(decoder, width, height, format)) { | |
| return false; | |
| } | |
| // Should check that the finite state machine tables were initialized | |
| assert(decoder->fsm[0].table.flags < 0); | |
| // Initialize the finite state machine for this decoder | |
| for(i=0; i<CODEC_NUM_CODESETS; i++) | |
| { | |
| InitFSM(&decoder->fsm[i], codesets[i].fsm_table); | |
| #if _COMPANDING | |
| // Scale the values in the finite state machine entries for companding | |
| ScaleFSM(&decoder->fsm[i].table); | |
| #endif | |
| } | |
| // Indicate that the decoder has been initialized | |
| decoder->state = DECODER_STATE_INITIALIZED; | |
| #if (1 && DUMP) | |
| // Write the wavelet bands as images | |
| SetDumpDirectory(CODEC_TYPE(decoder), DUMP_DECODER_DIRECTORY); | |
| SetDumpFilename(CODEC_TYPE(decoder), DUMP_DEFAULT_FILENAME); | |
| SetDumpChannelMask(CODEC_TYPE(decoder), 1/*ULONG_MAX*/); | |
| // SetDumpWaveletMask(CODEC_TYPE(decoder), 7<<4 | 1/*ULONG_MAX*/); | |
| SetDumpWaveletMask(CODEC_TYPE(decoder), ULONG_MAX); | |
| // Set this flag to enable output | |
| decoder->dump.enabled = true; | |
| #endif | |
| #if _TIMING | |
| // Initialize the global timers and counters | |
| InitTiming(); | |
| #endif | |
| //DAN20160203 Fix for a memory leak in InitCookbooks | |
| for (i = 0; i < CODEC_NUM_CODESETS; i++) | |
| { | |
| #if _ALLOCATOR | |
| Free(allocator, codesets[i].codebook_runbook); codesets[i].codebook_runbook = NULL; | |
| Free(allocator, codesets[i].fastbook); codesets[i].fastbook = NULL; | |
| Free(allocator, codesets[i].valuebook); codesets[i].valuebook = NULL; | |
| #else | |
| MEMORY_FREE(codesets[i].codebook_runbook); codesets[i].codebook_runbook = NULL; | |
| MEMORY_FREE(codesets[i].fastbook); codesets[i].fastbook = NULL; | |
| MEMORY_FREE(codesets[i].valuebook); codesets[i].valuebook = NULL; | |
| #endif | |
| } | |
| // The decoder has been initialized successfully | |
| return true; | |
| } | |
| void DecodeEntropyInit(DECODER *decoder) | |
| { | |
| int cpus = 1; | |
| if(decoder->thread_cntrl.capabilities == 0) | |
| { | |
| // Determine the processor capabilities | |
| SetDecoderCapabilities(decoder); | |
| } | |
| cpus = decoder->thread_cntrl.capabilities >> 16; | |
| if (cpus > (int)decoder->cfhddata.cpu_limit && decoder->cfhddata.cpu_limit) | |
| { | |
| cpus = decoder->cfhddata.cpu_limit; | |
| decoder->thread_cntrl.limit = cpus; | |
| decoder->thread_cntrl.set_thread_params = 1; | |
| decoder->thread_cntrl.capabilities &= 0xffff; | |
| decoder->thread_cntrl.capabilities |= cpus<<16; | |
| } | |
| assert(cpus > 0 && cpus <= _MAX_CPUS); | |
| #if _THREADED | |
| #if _DELAY_THREAD_START //start threads now if not _DELAY_THREAD_START | |
| if(cpus > 1 && decoder->entropy_worker_new.pool.thread_count == 0) | |
| { | |
| int threads = cpus; | |
| if(threads > 4) | |
| threads = 4; | |
| CreateLock(&decoder->entropy_worker_new.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->entropy_worker_new.pool, | |
| threads, | |
| EntropyWorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| #endif | |
| } | |
| bool DecodeOverrides(DECODER *decoder, unsigned char *overrideData, int overrideSize) | |
| { | |
| if(decoder->overrideData) | |
| { | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->overrideData); | |
| #else | |
| MEMORY_FREE(decoder->overrideData); | |
| #endif | |
| decoder->overrideData = NULL; | |
| decoder->overrideSize = 0; | |
| } | |
| if(overrideSize) | |
| { | |
| #if _ALLOCATOR | |
| decoder->overrideData = Alloc(decoder->allocator, overrideSize); | |
| #else | |
| decoder->overrideData = MEMORY_ALLOC(overrideSize); | |
| #endif | |
| if(decoder->overrideData) | |
| { | |
| memcpy(decoder->overrideData, overrideData, overrideSize); | |
| decoder->overrideSize = overrideSize; | |
| } | |
| } | |
| else | |
| { | |
| int i; | |
| for(i=METADATA_PRIORITY_OVERRIDE; i<=METADATA_PRIORITY_MAX; i++) //This was 0 to max but that cause right eye primary corrections(side-by-side) mode to flicker. | |
| // This database cleariing was added but I don't know why. | |
| { | |
| if(decoder->DataBases[i]) | |
| { | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, decoder->DataBases[i]); | |
| #else | |
| MEMORY_FREE(decoder->DataBases[i]); | |
| #endif | |
| decoder->DataBases[i] = NULL; | |
| decoder->DataBasesSize[i] = 0; | |
| decoder->DataBasesAllocSize[i] = 0; | |
| } | |
| } | |
| } | |
| return true; | |
| } | |
| TRANSFORM *AllocGroupTransform(GROUP *group, int channel) | |
| { | |
| #if _ALLOCATOR | |
| //TODO:ALLOC Change this routine to take an allocator as the first argument | |
| ALLOCATOR *allocator = NULL; | |
| #endif | |
| TRANSFORM *transform; | |
| // Channel zero is a special case because it may mean | |
| // that the group header has not been decoded yet | |
| if (channel != 0) | |
| { | |
| // Make sure that the channel number is in range | |
| assert(0 <= channel && channel < group->header.num_channels); | |
| if (!(0 <= channel && channel < group->header.num_channels)) | |
| return NULL; | |
| } | |
| transform = group->transform[channel]; | |
| // Need to allocate a transform data structure? | |
| if (transform == NULL) { | |
| #if _ALLOCATOR | |
| transform = (TRANSFORM *)Alloc(allocator, sizeof(TRANSFORM)); | |
| #else | |
| transform = (TRANSFORM *)MEMORY_ALLOC(sizeof(TRANSFORM)); | |
| #endif | |
| assert(transform != NULL); | |
| if (transform == NULL) return NULL; | |
| memset(transform, 0, sizeof(TRANSFORM)); | |
| group->transform[channel] = transform; | |
| #if _TIMING | |
| alloc_transform_count++; | |
| #endif | |
| } | |
| return transform; | |
| } | |
| //extern FILE *logfile; | |
| void EraseOutputBuffer(uint8_t *buffer, int width, int height, int32_t pitch, int format) | |
| { | |
| size_t size = height * pitch; | |
| union { | |
| uint8_t byte[4]; | |
| uint32_t word; | |
| } output; | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_YUYV: | |
| output.byte[0] = COLOR_LUMA_BLACK; | |
| output.byte[1] = COLOR_CHROMA_ZERO; | |
| output.byte[2] = COLOR_LUMA_BLACK; | |
| output.byte[3] = COLOR_CHROMA_ZERO; | |
| break; | |
| default: | |
| //if (logfile) fprintf(logfile,"**Unknown format: %d\n", format); | |
| //assert(0); | |
| output.word = 0; | |
| break; | |
| } | |
| memset(buffer, output.word, size); | |
| } | |
| // Decode the coefficients in a subband | |
| bool DecodeSampleSubband(DECODER *decoder, BITSTREAM *input, int subband); | |
| // Decode the coefficients in a lowpass band | |
| bool DecodeSampleLowPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet); | |
| // Decode the coefficients in a highpass band | |
| bool DecodeSampleHighPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, int band, int threading); | |
| // Decode an empty band | |
| bool DecodeSampleEmptyBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, int band); | |
| bool DecodeBand16s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height); | |
| bool DecodeBand16sLossless(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height); | |
| // Decode a sample channel header | |
| bool DecodeSampleChannelHeader(DECODER *decoder, BITSTREAM *input); | |
| // Apply the inverse horizontal-temporal transform to reconstruct the output frame | |
| void ReconstructSampleFrameToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| #if 0 | |
| // Reconstruct the frame to quarter resolution at full frame rate | |
| void ReconstructQuarterFrame(DECODER *decoder, int num_channels, | |
| uint8_t *frame1, uint8_t *frame2, int output_pitch, | |
| FRAME_INFO *info, char *buffer, size_t buffer_size); | |
| #else | |
| // Reconstruct the frame to quarter resolution at full frame rate | |
| void ReconstructQuarterFrame(DECODER *decoder, int num_channels, | |
| int frame_index, uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, const SCRATCH *scratch, int precision); | |
| #endif | |
| // Copy the quarter resolution lowpass channels from the spatial transform | |
| void CopyQuarterFrameToBuffer(TRANSFORM **transform_array, int num_channels, | |
| uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, int precision); | |
| // Convert the quarter resolution lowpass channels to the specified output format | |
| void ConvertQuarterFrameToBuffer(DECODER *decoder, TRANSFORM **transform_array, int num_channels, | |
| uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, int precision); | |
| // Routines for converting the new encoded formats to the requested output format | |
| CODEC_ERROR ReconstructSampleFrameRGB444ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameRGBA4444ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameYUVA4444ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| // The first Bayer routine calls the other Bayer routines for the decoded resolution | |
| CODEC_ERROR ReconstructSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameDeBayerFullToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameBayerFullToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameBayerHalfToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR ReconstructSampleFrameBayerQuarterToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR UncompressedSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR UncompressedSampleFrameYUVToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| CODEC_ERROR UncompressedSampleFrameRGBToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch); | |
| // New code for handling the original YUV 4:2:2 encoded format | |
| CODEC_ERROR ReconstructSampleFrameYUV422ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch); | |
| // Return true if the rest of the channel does not have to be decoded | |
| static bool CanSkipChannel(DECODER *decoder, int resolution) | |
| { | |
| CODEC_STATE *codec = &decoder->codec; | |
| int channel = codec->channel; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| int transform_type = transform->type; | |
| // Can the rest of the channel be skipped? | |
| if (transform_type == TRANSFORM_TYPE_FIELDPLUS) | |
| { | |
| switch (resolution) | |
| { | |
| case DECODED_RESOLUTION_HALF: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| return ((codec->decoded_subband_flags & DECODED_SUBBAND_MASK_HALF) == DECODED_SUBBAND_MASK_HALF); | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| return ((codec->decoded_subband_flags & DECODED_SUBBAND_MASK_QUARTER) == DECODED_SUBBAND_MASK_QUARTER); | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| return (codec->decoded_subband_flags & 1); | |
| break; | |
| default: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| { | |
| if(decoder->frame.format == DECODED_FORMAT_YUYV || decoder->frame.format == DECODED_FORMAT_UYVY) | |
| { | |
| // If we are requesting a YUV decode we don't need the 4th channel | |
| if(codec->channel == 3) | |
| { | |
| return true; | |
| } | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| const uint32_t decoded_subband_mask_half = 0x7F; | |
| const uint32_t decoded_subband_mask_quarter = 0x0F; | |
| assert(transform_type == TRANSFORM_TYPE_SPATIAL); | |
| switch (resolution) | |
| { | |
| case DECODED_RESOLUTION_HALF: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| return ((codec->decoded_subband_flags & decoded_subband_mask_half) == decoded_subband_mask_half); | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| return ((codec->decoded_subband_flags & decoded_subband_mask_quarter) == decoded_subband_mask_quarter); | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| return (codec->decoded_subband_flags & 1); | |
| break; | |
| default: | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| { | |
| if(decoder->frame.format == DECODED_FORMAT_YUYV || decoder->frame.format == DECODED_FORMAT_UYVY) | |
| { | |
| // If we are requesting a YUV decode we don't need the 4th channel | |
| if(codec->channel == 3) | |
| { | |
| return true; | |
| } | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| // Cannot skip the rest of the channel | |
| return false; | |
| } | |
| #if 0 | |
| static bool CanSkipSubband(DECODER *decoder, int subband) | |
| { | |
| // Bitmask indicates which subbands must be decoded for quarter resolution | |
| static uint32_t quarter_resolution_mask = 0x008F; | |
| // Convert the subband number into a bitmask (could use a lookup table) | |
| uint32_t subband_mask = SUBBAND_MASK(subband); | |
| // Select the resolution of the fully decoded frames | |
| int resolution = decoder->frame.resolution; | |
| switch (resolution) | |
| { | |
| case DECODED_RESOLUTION_QUARTER: | |
| //if (4 <= subband && subband <= 6) | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| { | |
| if ((subband_mask & quarter_resolution_mask) == 0) { | |
| return true; | |
| } | |
| } | |
| break; | |
| default: | |
| // Assume that the subband must be decoded | |
| break; | |
| } | |
| return false; | |
| } | |
| #endif | |
| // Return true if the wavelet exists and all bands are valid | |
| static bool AllBandsValid(IMAGE *wavelet) | |
| { | |
| return (wavelet != NULL && BANDS_ALL_VALID(wavelet)); | |
| } | |
| #if DEBUG | |
| static bool AllTransformBandsValid(TRANSFORM *transform_array[], int num_channels, int frame_index) | |
| { | |
| int channel; | |
| if (!(1 <= num_channels && num_channels <= TRANSFORM_MAX_CHANNELS)) { | |
| assert(0); | |
| return false; | |
| } | |
| if (!(0 <= frame_index && frame_index < TRANSFORM_MAX_FRAMES)) { | |
| assert(0); | |
| return false; | |
| } | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform_array[channel]->wavelet[frame_index]; | |
| if (!AllBandsValid(wavelet)) | |
| { | |
| return false; | |
| } | |
| } | |
| // All wavelet bands in all channels are valid | |
| return true; | |
| } | |
| static bool AllLowpassBandsValid(TRANSFORM *transform_array[], int num_channels, int frame_index) | |
| { | |
| int channel; | |
| if (!(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS)) { | |
| return false; | |
| } | |
| if (!(0 <= frame_index && frame_index < TRANSFORM_MAX_FRAMES)) { | |
| return false; | |
| } | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform_array[channel]->wavelet[frame_index]; | |
| if (!(wavelet != NULL && wavelet->band_valid_flags & BAND_VALID_MASK(0))) { | |
| return false; | |
| } | |
| } | |
| // All lowpass bands in all channels are valid | |
| return true; | |
| } | |
| #endif | |
| static bool | |
| ComputeFrameDimensionsFromFirstWavelet(int transform_type, | |
| int first_wavelet_width, | |
| int first_wavelet_height, | |
| int *frame_width_out, | |
| int *frame_height_out) | |
| { | |
| int frame_width; | |
| int frame_height; | |
| int expansion = 8; | |
| switch (transform_type) | |
| { | |
| case TRANSFORM_TYPE_SPATIAL: | |
| frame_width = first_wavelet_width * expansion; | |
| frame_height = first_wavelet_height * expansion; | |
| break; | |
| case TRANSFORM_TYPE_FIELDPLUS: | |
| frame_width = first_wavelet_width * expansion; | |
| frame_height = first_wavelet_height * expansion; | |
| break; | |
| default: | |
| assert(0); | |
| return false; | |
| } | |
| // Return the frame dimensions | |
| *frame_width_out = frame_width; | |
| *frame_height_out = frame_height; | |
| return true; | |
| } | |
| // Decode the sample header to determine the type of sample and other parameters | |
| bool ParseSampleHeader(BITSTREAM *input, SAMPLE_HEADER *header) | |
| { | |
| TAGVALUE segment; | |
| int sample_type; | |
| int sample_size = 0; | |
| // Group index | |
| uint32_t channel_size[TRANSFORM_MAX_CHANNELS]; | |
| // Number of channels in the group index | |
| int channel_count; | |
| // Values used for computing the frame width and height (if necessary) | |
| int transform_type = -1; | |
| int first_wavelet_width = 0; | |
| int first_wavelet_height = 0; | |
| int display_height = 0; | |
| int current_channel = 0; | |
| int currentVideoChannel = header->videoChannels; | |
| int find_lowpass_bands = header->find_lowpass_bands & 1; | |
| int find_uncompressed = header->find_lowpass_bands & 2 ? 1 : 0; | |
| int find_header_info_only = header->find_lowpass_bands & 4 ? 1 : 0; | |
| if (header == NULL) { | |
| return false; | |
| } | |
| if(currentVideoChannel == 0) | |
| currentVideoChannel = 1; | |
| // Clear the entire sample header to prevent early return from this routine | |
| memset(header, 0, sizeof(SAMPLE_HEADER)); | |
| // Clear the error code | |
| header->error = CODEC_ERROR_OKAY; | |
| // Initialize the frame dimensions to unknown | |
| header->width = 0; | |
| header->height = 0; | |
| header->videoChannels = 1; | |
| // Initialize the original pixel format to unknown | |
| header->input_format = COLOR_FORMAT_UNKNOWN; | |
| // Initialize the encoded format to unknown | |
| header->encoded_format = ENCODED_FORMAT_UNKNOWN; | |
| // Clear the frame number in case it is not present in the sample | |
| header->frame_number = 0; | |
| // The video is not progressive if the sample flags are not present | |
| header->hdr_progressive = false; | |
| #if _BITSTREAM_UNALIGNED | |
| // Record the alignment of the bitstream within the sample | |
| SetBitstreamAlignment(input, 0); | |
| #endif | |
| sample_size = input->nWordsUsed; | |
| // Get the type of sample (should be the first tag value pair) | |
| segment = GetTagValue(input); | |
| assert(segment.tuple.tag == CODEC_TAG_SAMPLE); | |
| if (!IsValidSegment(input, segment, CODEC_TAG_SAMPLE)) { | |
| header->error = CodecErrorBitstream(input); | |
| return false; | |
| } | |
| sample_type = segment.tuple.value; | |
| switch (sample_type) | |
| { | |
| case SAMPLE_TYPE_GROUP: // Group of frames | |
| header->key_frame = true; | |
| header->difference_frame = false; | |
| header->droppable_frame = false; | |
| break; | |
| case SAMPLE_TYPE_FRAME: // The second or later frame in a group | |
| header->key_frame = false; | |
| header->difference_frame = true; | |
| header->droppable_frame = true; | |
| break; | |
| case SAMPLE_TYPE_IFRAME: // One frame in the group | |
| header->key_frame = true; | |
| header->difference_frame = false; | |
| header->droppable_frame = true; | |
| break; | |
| case SAMPLE_TYPE_SEQUENCE_HEADER: | |
| // Treat the video sequence header like a keyframe that can be dropped | |
| header->key_frame = true; | |
| header->difference_frame = false; | |
| header->droppable_frame = true; | |
| break; | |
| default: | |
| // Unknown type of sample | |
| header->error = CODEC_ERROR_SAMPLE_TYPE; | |
| return false; | |
| break; | |
| } | |
| // Continue parsing the sample header until all of the information has been found | |
| while ( (find_lowpass_bands == 1 && current_channel < 3) || //parse all | |
| (find_uncompressed == 1 && current_channel < 1) || | |
| display_height == 0 || | |
| header->width == 0 || | |
| header->height == 0 || | |
| header->input_format == COLOR_FORMAT_UNKNOWN || | |
| header->frame_number == 0 || | |
| (header->interlaced_flags == 0 && header->hdr_progressive == 0)) | |
| { | |
| int chunksize = 0; | |
| // Get the next tag value pair from the bitstream | |
| segment = GetSegment(input); | |
| // Did the bitstream end before the last tag was found? | |
| if (input->error == BITSTREAM_ERROR_UNDERFLOW) { | |
| break; | |
| } | |
| // Did an error occur while reading the bitstream? | |
| if (input->error != BITSTREAM_ERROR_OKAY) { | |
| header->error = CodecErrorBitstream(input); | |
| return false; | |
| } | |
| // Is this an optional tag? | |
| if (segment.tuple.tag < 0) { | |
| segment.tuple.tag = NEG(segment.tuple.tag); | |
| } | |
| if(segment.tuple.tag & 0x2000) | |
| { | |
| chunksize = segment.tuple.value; | |
| chunksize &= 0xffff; | |
| chunksize += ((segment.tuple.tag&0xff)<<16); | |
| } | |
| else if(segment.tuple.tag & 0x4000) | |
| { | |
| chunksize = segment.tuple.value; | |
| chunksize &= 0xffff; | |
| } | |
| // else if(tag == CODEC_TAG_INDEX) // handled below | |
| // { | |
| // chunksize = value; | |
| // chunksize &= 0xffff; | |
| // } | |
| else | |
| { | |
| chunksize = 0; | |
| } | |
| if((int)(segment.tuple.tag) <= ((int)CODEC_TAG_LAST_NON_SIZED) || segment.tuple.tag & 0x6000) | |
| { | |
| int skip = 1; | |
| if((segment.tuple.tag & 0xff00) == 0x2200) //sample size | |
| { | |
| if(sample_size < chunksize*4) | |
| find_header_info_only = 1; | |
| skip = find_header_info_only; | |
| if(currentVideoChannel <= 1 && header->videoChannels == 2 && !find_header_info_only) | |
| { | |
| BITSTREAM input2; | |
| SAMPLE_HEADER header2; | |
| BITWORD *eye2 = (BITWORD *)(input->lpCurrentWord + chunksize*4); | |
| int eye_offset = sample_size - input->nWordsUsed + chunksize*4; //approx | |
| int eye_sample_size = input->nWordsUsed - eye_offset; | |
| // Search for first sample of the next frame | |
| while((eye2[1] != (uint8_t)CODEC_TAG_SAMPLE || eye2[0] != 0 || eye2[2] != 0) && eye_sample_size > 0) | |
| { | |
| eye2 += 4; | |
| chunksize ++; | |
| eye_offset += 4; | |
| eye_sample_size -= 4; | |
| } | |
| // Save the offset to the right stereo sample | |
| header->left_sample_size = eye_offset; | |
| { | |
| InitBitstreamBuffer(&input2, eye2, eye_sample_size, BITSTREAM_ACCESS_READ); | |
| memset(&header2, 0, sizeof(SAMPLE_HEADER)); | |
| header2.find_lowpass_bands = 1; | |
| currentVideoChannel++; | |
| header2.videoChannels = currentVideoChannel; | |
| if(ParseSampleHeader(&input2, &header2)) | |
| { | |
| int i; | |
| for(i=0;i<4;i++) | |
| { | |
| if(header2.thumbnail_channel_offsets[i]) | |
| header->thumbnail_channel_offsets_2nd_Eye[i] = eye_offset + header2.thumbnail_channel_offsets[i]; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if((segment.tuple.tag & 0xff00) == 0x2300) //uncompressed sample size | |
| { | |
| header->hdr_uncompressed = 1; | |
| skip = 1; | |
| if(find_lowpass_bands != 1) | |
| break; | |
| } | |
| if((segment.tuple.tag & 0xff00) == 0x2100) //level | |
| { | |
| if(find_lowpass_bands == 1) | |
| { | |
| skip = 0; | |
| } | |
| else | |
| { | |
| skip = 1; // no header data after the fix level | |
| break; | |
| } | |
| } | |
| if(chunksize) | |
| { | |
| if(skip) | |
| { | |
| input->lpCurrentWord += chunksize*4; | |
| input->nWordsUsed -= chunksize*4; | |
| } | |
| } | |
| else | |
| { | |
| switch (segment.tuple.tag) | |
| { | |
| case CODEC_TAG_VERSION: // Version number of the encoder used in each GOP. | |
| header->encoder_version = (((segment.tuple.value>>12) & 0xf)<<16) | | |
| (((segment.tuple.value>>8) & 0xf)<<8) | | |
| ((segment.tuple.value) & 0xff); | |
| break; | |
| case CODEC_TAG_INDEX: | |
| // Get the number of channels in the index to skip | |
| channel_count = segment.tuple.value; | |
| DecodeGroupIndex(input, (uint32_t *)&channel_size[0], channel_count); | |
| break; | |
| case CODEC_TAG_FRAME_WIDTH: | |
| // Record the frame width in the sample header | |
| header->width = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_FRAME_HEIGHT: | |
| // Record the frame height in the sample header | |
| header->height = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_FRAME_DISPLAY_HEIGHT: | |
| display_height = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_LOWPASS_WIDTH: | |
| // Save the width of the smallest wavelet for computing the frame dimensions | |
| first_wavelet_width = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_LOWPASS_HEIGHT: | |
| // Save the height of the smallest wavelet for computing the frame dimensions | |
| first_wavelet_height = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_TRANSFORM_TYPE: | |
| // Save the type of transform for computing the frame dimensions (if necessary) | |
| transform_type = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_INPUT_FORMAT: | |
| // Record the original format of the encoded frames | |
| header->input_format = (COLOR_FORMAT)segment.tuple.value; | |
| break; | |
| case CODEC_TAG_ENCODED_FORMAT: | |
| case CODEC_TAG_OLD_ENCODED_FORMAT: | |
| // Record the encoded format (internal representation) | |
| header->encoded_format = (ENCODED_FORMAT)segment.tuple.value; | |
| if(header->encoded_format == ENCODED_FORMAT_RGBA_4444 && channel_count == 3) | |
| header->encoded_format = ENCODED_FORMAT_RGB_444; | |
| break; | |
| case CODEC_TAG_FRAME_NUMBER: | |
| // Record the frame number for debugging | |
| header->frame_number = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_INTERLACED_FLAGS: | |
| // Record the flags that indicate the field type | |
| header->interlaced_flags = segment.tuple.value; | |
| break; | |
| case CODEC_TAG_SAMPLE_FLAGS: | |
| // The sample flags specify progressive versus interlaced decoding | |
| header->hdr_progressive = !!(segment.tuple.value & SAMPLE_FLAGS_PROGRESSIVE); | |
| if (header->hdr_progressive) { | |
| // Clear the interlaced flags | |
| header->interlaced_flags = 0; | |
| } | |
| break; | |
| case CODEC_TAG_LOWPASS_SUBBAND: | |
| if(segment.tuple.value == 0) // low pass band | |
| { | |
| int count = 8; | |
| uint32_t *lptr = (uint32_t *)input->lpCurrentWord; | |
| do | |
| { | |
| uint32_t longword = SwapInt32(lptr[count]); | |
| unsigned short t,v; | |
| t = (longword>>16) & 0xffff; | |
| v = (longword) & 0xffff; | |
| if (t == CODEC_TAG_MARKER && IsLowPassBandMarker(v) && current_channel < 4) | |
| { | |
| header->thumbnail_channel_offsets[current_channel] = (sample_size - input->nWordsUsed) + count*4 + 4; | |
| break; | |
| } | |
| count++; | |
| } while(count < 32); | |
| current_channel++; | |
| } | |
| break; | |
| case CODEC_TAG_ENCODED_CHANNELS: | |
| if(header->videoChannels == 1) | |
| { | |
| header->videoChannels = segment.tuple.value; | |
| if(header->videoChannels < 1) | |
| header->videoChannels = 1; | |
| } | |
| break; | |
| case CODEC_TAG_QUALITY_L: // | |
| header->encode_quality &= 0xffff0000; | |
| header->encode_quality |= segment.tuple.value; | |
| break; | |
| case CODEC_TAG_QUALITY_H: // | |
| header->encode_quality &= 0xffff; | |
| header->encode_quality |= segment.tuple.value<<16; | |
| break; | |
| } | |
| // Have the encoded frame dimensions been computed? | |
| if (header->width == 0 || header->height == 0) | |
| { | |
| // Found the first wavelet in the bitstream? | |
| if (transform_type >= 0 && first_wavelet_width > 0 && first_wavelet_height > 0) | |
| { | |
| // The group header did not contain tags for the frame dimensions | |
| // prior to the release of support for RGB 4:4:4, so must attempt to | |
| // compute the frame dimensions from the dimensions of the lowpass band. | |
| int frame_width = 0; | |
| int frame_height = 0; | |
| // Use the dimensions of the first wavelet to compute the frame width and height | |
| if (!ComputeFrameDimensionsFromFirstWavelet(transform_type, | |
| first_wavelet_width, | |
| first_wavelet_height, | |
| &frame_width, | |
| &frame_height)) { | |
| // Could not compute the frame dimensions | |
| header->error = CODEC_ERROR_FRAME_DIMENSIONS; | |
| return false; | |
| } | |
| // Save the frame dimensions in the sample header | |
| header->width = frame_width; | |
| header->height = frame_height; | |
| // No more header information after finding the lowpass band | |
| break; | |
| } | |
| } | |
| if(find_lowpass_bands != 1 && find_uncompressed != 1) | |
| { | |
| // No more header information after the first encoded band | |
| if (segment.tuple.tag == CODEC_TAG_BAND_NUMBER) | |
| { | |
| // Stop looking for header information | |
| break; | |
| } | |
| // No more header information after the frame index | |
| if (segment.tuple.tag == CODEC_TAG_FRAME_INDEX) | |
| { | |
| // Stop looking for header information | |
| break; | |
| } | |
| // No more header information after the lowpass band header | |
| if (segment.tuple.tag == CODEC_TAG_PIXEL_DEPTH) | |
| { | |
| // Stop looking for header information | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if (header->width == 0 || header->height == 0) { | |
| assert(0); | |
| } | |
| // Fill in the encoded format if it was not present in the header | |
| if (header->encoded_format == ENCODED_FORMAT_UNKNOWN) { | |
| header->encoded_format = GetEncodedFormat(header->input_format, header->encode_quality, channel_count); | |
| } | |
| if (display_height > 0) { | |
| header->height = display_height; | |
| } | |
| if (header->encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| header->width *= 2; | |
| header->height *= 2; | |
| if(display_height == 0) | |
| { | |
| if(header->height == 1088) | |
| header->height = 1080; | |
| } | |
| } | |
| // Return true if the header was parsed completely and correctly | |
| return (header->width > 0 && | |
| header->height > 0 && | |
| ((sample_type == SAMPLE_TYPE_FRAME) || | |
| (header->input_format != COLOR_FORMAT_UNKNOWN && | |
| header->encoded_format != ENCODED_FORMAT_UNKNOWN))); | |
| // It is not an error if the frame number was not found in the sample header | |
| } | |
| bool DumpSampleHeader(BITSTREAM *input, FILE *logfile) | |
| { | |
| TAGVALUE segment; | |
| int lowpass_width = 0; | |
| int lowpass_height = 0; | |
| // Parse the sample header until the lowpass band is found | |
| while (lowpass_width == 0 && lowpass_height == 0) | |
| { | |
| // Get the next tag value pair from the bitstream | |
| segment = GetSegment(input); | |
| // Did an error occur while reading the bitstream? | |
| if (input->error != BITSTREAM_ERROR_OKAY) { | |
| return false; | |
| } | |
| // Is this an optional tag? | |
| if (segment.tuple.tag < 0) { | |
| segment.tuple.tag = NEG(segment.tuple.tag); | |
| } | |
| // Check that the tag is valid | |
| assert(CODEC_TAG_ZERO < segment.tuple.tag && segment.tuple.tag <= CODEC_TAG_LAST_NON_SIZED); | |
| switch (segment.tuple.tag) | |
| { | |
| case CODEC_TAG_SAMPLE: | |
| fprintf(logfile, "Sample type: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_FRAME_WIDTH: | |
| fprintf(logfile, "Frame width: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_FRAME_HEIGHT: | |
| fprintf(logfile, "Frame height: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_LOWPASS_WIDTH: | |
| lowpass_width = segment.tuple.value; | |
| fprintf(logfile, "Lowpass width: %d\n", lowpass_width); | |
| break; | |
| case CODEC_TAG_LOWPASS_HEIGHT: | |
| lowpass_height = segment.tuple.value; | |
| fprintf(logfile, "Lowpass height: %d\n", lowpass_height); | |
| break; | |
| case CODEC_TAG_TRANSFORM_TYPE: | |
| fprintf(logfile, "Transform type: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_INPUT_FORMAT: | |
| fprintf(logfile, "Input format: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_ENCODED_FORMAT: | |
| case CODEC_TAG_OLD_ENCODED_FORMAT: | |
| fprintf(logfile, "Encoded format: %d\n", segment.tuple.value); | |
| break; | |
| case CODEC_TAG_FRAME_NUMBER: | |
| fprintf(logfile, "Frame number: %d\n", segment.tuple.value); | |
| break; | |
| } | |
| } | |
| return true; | |
| } | |
| int SkipVideoChannel(DECODER *decoder, BITSTREAM *input, int skip_to_channel) // 3D work | |
| { | |
| TAGWORD tag,value=1; | |
| unsigned char *pos = NULL; | |
| int readsize = input->nWordsUsed; | |
| if(readsize > 4096) // only need to scan the first few tuplets | |
| { | |
| readsize = 4096; | |
| } | |
| else | |
| { | |
| //Tiny therefore P-frame, nothing to be read so: | |
| value=decoder->real_channels; // return the last value. | |
| return value; | |
| } | |
| pos = GetTupletAddr(input->lpCurrentBuffer, readsize, CODEC_TAG_ENCODED_CHANNELS, &value); | |
| if(pos && value>1 && skip_to_channel>1) | |
| { | |
| int chunksize = 0; | |
| intptr_t offset; | |
| int count = 0; | |
| do | |
| { | |
| tag = *pos++<<8; | |
| tag |= *pos++; | |
| value = *pos++<<8; | |
| value |= *pos++; | |
| if (tag < 0) | |
| { | |
| tag = NEG(tag); | |
| } | |
| } while((tag & 0xff00) != CODEC_TAG_SAMPLE_SIZE && count++ < 10); | |
| if((tag & 0xff00) == CODEC_TAG_SAMPLE_SIZE) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| offset = ((intptr_t)pos - (intptr_t)input->lpCurrentWord) + chunksize*4; | |
| input->lpCurrentWord += offset; | |
| input->nWordsUsed -= (int)offset; | |
| { | |
| uint8_t *tag = (uint8_t *)input->lpCurrentWord; | |
| // Search for first sample of the next frame | |
| while((tag[1] != (uint8_t)CODEC_TAG_SAMPLE || tag[0] != 0 || tag[2] != 0) && input->nWordsUsed > 0) | |
| { | |
| input->lpCurrentWord += 4; | |
| input->nWordsUsed -= 4; | |
| tag += 4; | |
| } | |
| } | |
| } | |
| } | |
| //if(value == 0) value = 1; // old non-stereo file | |
| return value; | |
| } | |
| #define SUBPIXEL 64 | |
| static short gains[SUBPIXEL+1][4] = { | |
| {0*128,0*128,0x7fff,0*128}, | |
| {0*128,2*128,0x7fff,-2*128}, | |
| {0*128,5*128,255*128,-4*128}, | |
| {0*128,8*128,254*128,-6*128}, | |
| {0*128,11*128,253*128,-8*128}, | |
| {0*128,14*128,252*128,-10*128}, | |
| {0*128,18*128,250*128,-12*128}, | |
| {0*128,21*128,248*128,-13*128}, | |
| {-1*128,25*128,247*128,-15*128}, | |
| {-1*128,29*128,244*128,-16*128}, | |
| {-1*128,33*128,241*128,-17*128}, | |
| {-2*128,37*128,239*128,-18*128}, | |
| {-2*128,41*128,236*128,-19*128}, | |
| {-3*128,46*128,233*128,-20*128}, | |
| {-3*128,50*128,229*128,-20*128}, | |
| {-4*128,55*128,226*128,-21*128}, | |
| {-4*128,60*128,221*128,-21*128}, | |
| {-5*128,65*128,217*128,-21*128}, | |
| {-5*128,70*128,213*128,-22*128}, | |
| {-6*128,75*128,209*128,-22*128}, | |
| {-7*128,80*128,205*128,-22*128}, | |
| {-7*128,85*128,199*128,-21*128}, | |
| {-8*128,91*128,194*128,-21*128}, | |
| {-9*128,96*128,190*128,-21*128}, | |
| {-10*128,102*128,185*128,-21*128}, | |
| {-10*128,107*128,179*128,-20*128}, | |
| {-11*128,113*128,174*128,-20*128}, | |
| {-12*128,118*128,169*128,-19*128}, | |
| {-13*128,124*128,164*128,-19*128}, | |
| {-14*128,129*128,159*128,-18*128}, | |
| {-14*128,135*128,152*128,-17*128}, | |
| {-15*128,141*128,147*128,-17*128}, | |
| {-16*128,144*128,144*128,-16*128}, | |
| {-17*128,147*128,141*128,-15*128}, | |
| {-17*128,152*128,135*128,-14*128}, | |
| {-18*128,159*128,129*128,-14*128}, | |
| {-19*128,164*128,124*128,-13*128}, | |
| {-19*128,169*128,118*128,-12*128}, | |
| {-20*128,174*128,113*128,-11*128}, | |
| {-20*128,179*128,107*128,-10*128}, | |
| {-21*128,185*128,102*128,-10*128}, | |
| {-21*128,190*128,96*128,-9*128}, | |
| {-21*128,194*128,91*128,-8*128}, | |
| {-21*128,199*128,85*128,-7*128}, | |
| {-22*128,205*128,80*128,-7*128}, | |
| {-22*128,209*128,75*128,-6*128}, | |
| {-22*128,213*128,70*128,-5*128}, | |
| {-21*128,217*128,65*128,-5*128}, | |
| {-21*128,221*128,60*128,-4*128}, | |
| {-21*128,226*128,55*128,-4*128}, | |
| {-20*128,229*128,50*128,-3*128}, | |
| {-20*128,233*128,46*128,-3*128}, | |
| {-19*128,236*128,41*128,-2*128}, | |
| {-18*128,239*128,37*128,-2*128}, | |
| {-17*128,241*128,33*128,-1*128}, | |
| {-16*128,244*128,29*128,-1*128}, | |
| {-15*128,247*128,25*128,-1*128}, | |
| {-13*128,248*128,21*128,0*128}, | |
| {-12*128,250*128,18*128,0*128}, | |
| {-10*128,252*128,14*128,0*128}, | |
| {-8*128,253*128,11*128,0*128}, | |
| {-6*128,254*128,8*128,0*128}, | |
| {-4*128,255*128,5*128,0*128}, | |
| {-2*128,0x7fff,2*128,0*128}, | |
| {0*128,0*128,0x7fff,0*128} | |
| }; | |
| static int lanczos[256] = | |
| { | |
| 0, | |
| -2, | |
| -8, | |
| -18, | |
| -33, | |
| -53, | |
| -77, | |
| -106, | |
| -141, | |
| -179, | |
| -223, | |
| -272, | |
| -325, | |
| -384, | |
| -447, | |
| -514, | |
| -586, | |
| -662, | |
| -742, | |
| -826, | |
| -913, | |
| -1004, | |
| -1097, | |
| -1193, | |
| -1290, | |
| -1389, | |
| -1490, | |
| -1591, | |
| -1692, | |
| -1792, | |
| -1892, | |
| -1990, | |
| -2086, | |
| -2179, | |
| -2269, | |
| -2355, | |
| -2436, | |
| -2511, | |
| -2580, | |
| -2643, | |
| -2697, | |
| -2744, | |
| -2781, | |
| -2809, | |
| -2826, | |
| -2832, | |
| -2826, | |
| -2808, | |
| -2776, | |
| -2730, | |
| -2670, | |
| -2594, | |
| -2503, | |
| -2395, | |
| -2271, | |
| -2129, | |
| -1969, | |
| -1790, | |
| -1593, | |
| -1377, | |
| -1141, | |
| -886, | |
| -611, | |
| -315, | |
| 0, | |
| 336, | |
| 692, | |
| 1069, | |
| 1466, | |
| 1884, | |
| 2321, | |
| 2778, | |
| 3255, | |
| 3750, | |
| 4265, | |
| 4797, | |
| 5347, | |
| 5914, | |
| 6498, | |
| 7097, | |
| 7711, | |
| 8340, | |
| 8982, | |
| 9636, | |
| 10301, | |
| 10977, | |
| 11663, | |
| 12357, | |
| 13058, | |
| 13765, | |
| 14477, | |
| 15192, | |
| 15910, | |
| 16630, | |
| 17349, | |
| 18066, | |
| 18781, | |
| 18871, | |
| 19580, | |
| 20285, | |
| 20986, | |
| 21678, | |
| 22361, | |
| 23035, | |
| 23697, | |
| 24348, | |
| 24983, | |
| 25604, | |
| 26206, | |
| 26790, | |
| 27354, | |
| 27898, | |
| 28419, | |
| 28915, | |
| 29387, | |
| 29832, | |
| 30249, | |
| 30638, | |
| 30997, | |
| 31326, | |
| 31623, | |
| 31886, | |
| 32117, | |
| 32314, | |
| 32476, | |
| 32603, | |
| 32695, | |
| 32749, | |
| 32767, //was 32768, issue for SSE2 | |
| 32749, | |
| 32695, | |
| 32603, | |
| 32476, | |
| 32314, | |
| 32117, | |
| 31886, | |
| 31623, | |
| 31326, | |
| 30997, | |
| 30638, | |
| 30249, | |
| 29832, | |
| 29387, | |
| 28915, | |
| 28419, | |
| 27898, | |
| 27354, | |
| 26790, | |
| 26206, | |
| 25604, | |
| 24983, | |
| 24348, | |
| 23697, | |
| 23035, | |
| 22361, | |
| 21678, | |
| 20986, | |
| 20285, | |
| 19580, | |
| 18871, | |
| 18159, | |
| 18066, | |
| 17349, | |
| 16630, | |
| 15910, | |
| 15192, | |
| 14477, | |
| 13765, | |
| 13058, | |
| 12357, | |
| 11663, | |
| 10977, | |
| 10301, | |
| 9636, | |
| 8982, | |
| 8340, | |
| 7711, | |
| 7097, | |
| 6498, | |
| 5914, | |
| 5347, | |
| 4797, | |
| 4265, | |
| 3750, | |
| 3255, | |
| 2778, | |
| 2321, | |
| 1884, | |
| 1466, | |
| 1069, | |
| 692, | |
| 336, | |
| 0, | |
| -315, | |
| -611, | |
| -886, | |
| -1141, | |
| -1377, | |
| -1593, | |
| -1790, | |
| -1969, | |
| -2129, | |
| -2271, | |
| -2395, | |
| -2503, | |
| -2594, | |
| -2670, | |
| -2730, | |
| -2776, | |
| -2808, | |
| -2826, | |
| -2832, | |
| -2826, | |
| -2809, | |
| -2781, | |
| -2744, | |
| -2697, | |
| -2643, | |
| -2580, | |
| -2511, | |
| -2436, | |
| -2355, | |
| -2269, | |
| -2179, | |
| -2086, | |
| -1990, | |
| -1892, | |
| -1792, | |
| -1692, | |
| -1591, | |
| -1490, | |
| -1389, | |
| -1290, | |
| -1193, | |
| -1097, | |
| -1004, | |
| -913, | |
| -826, | |
| -742, | |
| -662, | |
| -586, | |
| -514, | |
| -447, | |
| -384, | |
| -325, | |
| -272, | |
| -223, | |
| -179, | |
| -141, | |
| -106, | |
| -77, | |
| -53, | |
| -33, | |
| -18, | |
| -8, | |
| -2, | |
| }; | |
| void RGB48VerticalShiftZoom(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, | |
| int widthbytes, int height, int pitch, float offset, | |
| float zoom) | |
| { | |
| float yposf,ystepf; | |
| int x; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline[4]; | |
| //int spitch = pitch/2; | |
| int neg = 0,step; | |
| __m128i lA,lB,lC,lD,gA,gB,gC,gD,o128,t1; | |
| __m128i *lineA, *lineB, *lineC, *lineD, *outline128; | |
| offset = -offset; | |
| yposf = height * offset; | |
| yposf = (float)height*(0.5f - 1.0f/(2.0f*zoom) - offset); | |
| ystepf = 1.0f/zoom; | |
| if(yposf < 0.0) | |
| neg = 1; | |
| if(pitch < 0) | |
| yposf -= ystepf; | |
| /* yposi = floor(yposf); | |
| remainf = yposf - (float)yposi; | |
| tablepos = (remainf*(float)SUBPIXEL); | |
| yposi = abs(yposi); | |
| if(yposi==0 && tablepos == 0) | |
| return; // no move required | |
| */ | |
| // -3 , 0 best small notch at zero? | |
| // | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_YUYV: | |
| step = 16; | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| default: | |
| step = 32; | |
| break; | |
| } | |
| { | |
| static char zeroline[1024] = {0}; | |
| int y,yoffset = ((int)(yposf-2.0)),yend = ((int)(yposf+2.0+ystepf*height)); | |
| unsigned char *src = (unsigned char *)RGB48; | |
| unsigned char *dst = (unsigned char *)RGB48; | |
| unsigned char *ptr = (unsigned char *)buffer; | |
| if(yoffset < 0) yoffset = 0; | |
| if(yend > height) yend = height; | |
| src += pitch * yoffset; | |
| for(y=yoffset; y<yend; y++) | |
| { | |
| memcpy(ptr, src, widthbytes); | |
| ptr += widthbytes; | |
| src += pitch; | |
| } | |
| ptr = (unsigned char *)buffer; | |
| for(y=0;y<height; y++) | |
| { | |
| int i,t,yp = ((int)yposf); | |
| int rmdr = 63-((int)(yposf*64.0) & 63); | |
| int gains[4]; | |
| yp -= 1; // use -2 cause a image down shift //DAN20100225 | |
| t = 0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(yp<0 || yp>= height) // skip 0 line as the top line was zagged | |
| { | |
| t += gains[i] = lanczos[rmdr]; | |
| scanline[i] = (unsigned short *)zeroline; | |
| } | |
| else | |
| { | |
| t += gains[i] = lanczos[rmdr]; | |
| scanline[i] = (unsigned short *)&ptr[widthbytes*(yp-yoffset)]; | |
| } | |
| yp++; | |
| rmdr+=64; | |
| } | |
| if(t) | |
| { | |
| __m128i half; | |
| gA = _mm_set1_epi16(gains[0]); | |
| gB = _mm_set1_epi16(gains[1]); | |
| gC = _mm_set1_epi16(gains[2]); | |
| gD = _mm_set1_epi16(gains[3]); | |
| outline128 = (__m128i *)dst; | |
| lineA = (__m128i *)scanline[0]; | |
| lineB = (__m128i *)scanline[1]; | |
| lineC = (__m128i *)scanline[2]; | |
| lineD = (__m128i *)scanline[3]; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_WP13: | |
| for(x=0;x<widthbytes; x+=step) | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lB = _mm_loadu_si128(lineB++); | |
| lC = _mm_loadu_si128(lineC++); | |
| lD = _mm_loadu_si128(lineD++); | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| _mm_storeu_si128(outline128++, o128); | |
| lA = _mm_loadu_si128(lineA++); | |
| lB = _mm_loadu_si128(lineB++); | |
| lC = _mm_loadu_si128(lineC++); | |
| lD = _mm_loadu_si128(lineD++); | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_RG48: | |
| for(x=0;x<widthbytes; x+=step) | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| _mm_storeu_si128(outline128++, o128); | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_YUYV: | |
| for(x=0;x<widthbytes; x+=step) | |
| { | |
| lA = _mm_loadu_si128(lineA); | |
| lA = _mm_unpackhi_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128(lineB); | |
| lB = _mm_unpackhi_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128(lineC); | |
| lC = _mm_unpackhi_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128(lineD); | |
| lD = _mm_unpackhi_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| half = o128; | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_unpacklo_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_unpacklo_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_unpacklo_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_unpacklo_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| half = _mm_srli_epi16(half,8); | |
| o128 = _mm_srli_epi16(o128,8); | |
| o128 = _mm_packus_epi16(o128, half); | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_YUYV) | |
| { | |
| memset(dst, 0x10801080, widthbytes); | |
| } | |
| else | |
| { | |
| memset(dst, 0, widthbytes); | |
| } | |
| } | |
| yposf += ystepf; | |
| dst += pitch; | |
| } | |
| /*ptr = (unsigned char *)buffer; | |
| for(y=0;y<height; y++) | |
| { | |
| int r,g,b,yp = ((int)yposf); | |
| yposf += ystepf; | |
| if(yp<0 || yp>= height) | |
| { | |
| memset(dst, 0, widthbytes); | |
| } | |
| else | |
| { | |
| memcpy(dst, &ptr[widthbytes*yp], widthbytes); | |
| } | |
| dst += pitch; | |
| }*/ | |
| } | |
| } | |
| void RGB48VerticalShiftZoomFine(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, | |
| int widthbytes, int height, int pitch, float offset, | |
| float zoom, int xx) | |
| { | |
| float yposf,ystepf; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline[4]; | |
| //int spitch = pitch/2; | |
| int neg = 0,step; | |
| __m128i lA,lB,lC,lD,gA,gB,gC,gD,o128,t1; | |
| uint8_t *lineAPos, *lineBPos, *lineCPos, *lineDPos; | |
| uint8_t *outlinePos8; | |
| uint16_t *outlinePos16; | |
| offset = -offset; | |
| //yposf = height * offset; | |
| yposf = (float)height*(0.5f - 1.0f/(2.0f*zoom) - offset); | |
| ystepf = 1.0f/zoom; | |
| if(yposf < 0.0) | |
| neg = 1; | |
| if(pitch < 0) | |
| yposf -= ystepf; | |
| /* yposi = floor(yposf); | |
| remainf = yposf - (float)yposi; | |
| tablepos = (remainf*(float)SUBPIXEL); | |
| yposi = abs(yposi); | |
| if(yposi==0 && tablepos == 0) | |
| return; // no move required | |
| */ | |
| // -3 , 0 best small notch at zero? | |
| // | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| step = 4; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| step = 3; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| step = 4; | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| step = 8; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| step = 6; | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| { | |
| static char zeroline[1024] = {0}; | |
| int y,yoffset = ((int)(yposf-2.0)),yend = ((int)(yposf+2.0+ystepf*height)); | |
| unsigned char *src = (unsigned char *)RGB48; | |
| unsigned char *dst = (unsigned char *)RGB48; | |
| unsigned char *ptr = (unsigned char *)buffer; | |
| if(yoffset < 0) yoffset = 0; | |
| if(yend > height) yend = height; | |
| src += pitch * yoffset; | |
| for(y=yoffset; y<yend; y++) | |
| { | |
| memcpy(ptr, src, widthbytes); | |
| ptr += widthbytes; | |
| src += pitch; | |
| } | |
| ptr = (unsigned char *)buffer; | |
| for(y=0;y<height; y++) | |
| { | |
| int i,t,yp = ((int)yposf); | |
| int rmdr = 63-((int)(yposf*64.0) & 63); | |
| int gains[4]; | |
| yp -= 1; // use -2 cause a image down shift //DAN20100225 | |
| t = 0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(yp<0 || yp>= height) // skip 0 line as the top line was zagged | |
| { | |
| t += gains[i] = lanczos[rmdr]; | |
| scanline[i] = (unsigned short *)zeroline; | |
| } | |
| else | |
| { | |
| t += gains[i] = lanczos[rmdr]; | |
| scanline[i] = (unsigned short *)&ptr[widthbytes*(yp-yoffset)]; | |
| } | |
| yp++; | |
| rmdr+=64; | |
| } | |
| if(t) | |
| { | |
| gA = _mm_set1_epi16(gains[0]); | |
| gB = _mm_set1_epi16(gains[1]); | |
| gC = _mm_set1_epi16(gains[2]); | |
| gD = _mm_set1_epi16(gains[3]); | |
| outlinePos8 = (uint8_t *)dst; | |
| outlinePos16 = (uint16_t *)dst; | |
| lineAPos = (uint8_t *)scanline[0]; | |
| lineBPos = (uint8_t *)scanline[1]; | |
| lineCPos = (uint8_t *)scanline[2]; | |
| lineDPos = (uint8_t *)scanline[3]; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_W13A: | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=8; | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=8; | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=8; | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=8; | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| //_mm_storeu_si128((__m128i *)outlinePos, o128); | |
| outlinePos16[0] = _mm_extract_epi16(o128, 0); | |
| outlinePos16[1] = _mm_extract_epi16(o128, 1); | |
| outlinePos16[2] = _mm_extract_epi16(o128, 2); | |
| outlinePos16[3] = _mm_extract_epi16(o128, 3); | |
| outlinePos16+=4; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=6; | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=6; | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=6; | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=6; | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| //_mm_storeu_si128((__m128i *)outlinePos, o128); | |
| outlinePos16[0] = _mm_extract_epi16(o128, 0); | |
| outlinePos16[1] = _mm_extract_epi16(o128, 1); | |
| outlinePos16[2] = _mm_extract_epi16(o128, 2); | |
| outlinePos16+=3; | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=8; | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=8; | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=8; | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=8; | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| //_mm_storeu_si128((__m128i *)outlinePos, o128); | |
| outlinePos16[0] = _mm_extract_epi16(o128, 0); | |
| outlinePos16[1] = _mm_extract_epi16(o128, 1); | |
| outlinePos16[2] = _mm_extract_epi16(o128, 2); | |
| outlinePos16[3] = _mm_extract_epi16(o128, 3); | |
| outlinePos16+=4; | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=6; | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=6; | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=6; | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=6; | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| //_mm_storeu_si128((__m128i *)outlinePos, o128); | |
| outlinePos16[0] = _mm_extract_epi16(o128, 0); | |
| outlinePos16[1] = _mm_extract_epi16(o128, 1); | |
| outlinePos16[2] = _mm_extract_epi16(o128, 2); | |
| outlinePos16+=3; | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_YUYV: | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=4; | |
| lA = _mm_unpackhi_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=4; | |
| lB = _mm_unpackhi_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=4; | |
| lC = _mm_unpackhi_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=4; | |
| lD = _mm_unpackhi_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_srli_epi16(o128,4); | |
| outlinePos8[0] = _mm_extract_epi16(o128, 0); | |
| outlinePos8[1] = _mm_extract_epi16(o128, 1); | |
| outlinePos8[2] = _mm_extract_epi16(o128, 2); | |
| outlinePos8[3] = _mm_extract_epi16(o128, 3); | |
| outlinePos8+=4; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| { | |
| int r,g,b; | |
| b = ((lineAPos[0] * gains[0])>>7) + | |
| ((lineBPos[0] * gains[1])>>7) + | |
| ((lineCPos[0] * gains[2])>>7) + | |
| ((lineDPos[0] * gains[3])>>7); //16-bit | |
| g = ((lineAPos[1] * gains[0])>>7) + | |
| ((lineBPos[1] * gains[1])>>7) + | |
| ((lineCPos[1] * gains[2])>>7) + | |
| ((lineDPos[1] * gains[3])>>7); //16-bit | |
| r = ((lineAPos[2] * gains[0])>>7) + | |
| ((lineBPos[2] * gains[1])>>7) + | |
| ((lineCPos[2] * gains[2])>>7) + | |
| ((lineDPos[2] * gains[3])>>7); //16-bit | |
| if(r<0) r = 0; if(r>65535) r = 65535; | |
| if(g<0) g = 0; if(g>65535) g = 65535; | |
| if(b<0) b = 0; if(b>65535) b = 65535; | |
| lineAPos+=3; | |
| lineBPos+=3; | |
| lineCPos+=3; | |
| lineDPos+=3; | |
| outlinePos8[0] = b >> 8; //b | |
| outlinePos8[1] = g >> 8; //g | |
| outlinePos8[2] = r >> 8; //r | |
| outlinePos8+=3; | |
| /* SSE2 can't load byte alligned | |
| lA = _mm_loadu_si128((__m128i *)lineAPos); lineAPos+=3; | |
| lA = _mm_unpackhi_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128((__m128i *)lineBPos); lineBPos+=3; | |
| lB = _mm_unpackhi_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128((__m128i *)lineCPos); lineCPos+=3; | |
| lC = _mm_unpackhi_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128((__m128i *)lineDPos); lineDPos+=3; | |
| lD = _mm_unpackhi_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_srli_epi16(o128,4); | |
| outlinePos8[0] = _mm_extract_epi16(o128, 0); //b | |
| outlinePos8[1] = _mm_extract_epi16(o128, 1); //g | |
| outlinePos8[2] = _mm_extract_epi16(o128, 2); //r | |
| outlinePos8+=3; | |
| */ | |
| } | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_YUYV) | |
| { | |
| memset(dst, 0x10801080, widthbytes); | |
| } | |
| else | |
| { | |
| memset(dst, 0, widthbytes); | |
| } | |
| } | |
| yposf += ystepf; | |
| dst += pitch; | |
| } | |
| } | |
| } | |
| void RGB48VerticalShift(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, | |
| int widthbytes, int height, int pitch, float offset) | |
| { | |
| float yposf,remainf; | |
| int yposi,tablepos,x,y; | |
| int gainA,gainB,gainC,gainD; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline[4], *tline; | |
| int spitch = pitch/2; | |
| int neg = 0,shift = 0,skip,step; | |
| int origwidthbytes = widthbytes; | |
| int origwidthextra; | |
| __m128i lA, lB, lC, lD, gA, gB, gC, gD, o128, t1; | |
| __m128i *lineA, *lineB, *lineC, *lineD, *outline128; | |
| // offset = -offset; | |
| if(offset < 0.0) | |
| neg = 1; | |
| yposf = height * offset; | |
| yposi = (int)floor(yposf); | |
| remainf = yposf - (float)yposi; | |
| tablepos = (int)(remainf*(float)SUBPIXEL); | |
| yposi = abs(yposi); | |
| if(yposi==0 && tablepos == 0) | |
| return; // no move required | |
| // -3 , 0 best small notch at zero? | |
| // | |
| if(neg) | |
| { | |
| yposi -= 2; | |
| gainA = gains[tablepos][0]; | |
| gainB = gains[tablepos][1]; | |
| gainC = gains[tablepos][2]; | |
| gainD = gains[tablepos][3]; | |
| } | |
| else | |
| { | |
| yposi -= 1; //offset inherent in the table | |
| gainD = gains[tablepos][0]; | |
| gainC = gains[tablepos][1]; | |
| gainB = gains[tablepos][2]; | |
| gainA = gains[tablepos][3]; | |
| } | |
| gA = _mm_set1_epi16(gainA); | |
| gB = _mm_set1_epi16(gainB); | |
| gC = _mm_set1_epi16(gainC); | |
| gD = _mm_set1_epi16(gainD); | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| skip = 4; | |
| step = 16; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| skip = 3; | |
| step = 16; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| skip = 2; | |
| step = 16; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| default: | |
| skip = 6; | |
| step = 32; | |
| break; | |
| } | |
| // scanline[0] = buffer; | |
| // scanline[1] = buffer + width*skip/2; | |
| // scanline[2] = buffer + width*skip/2*2; | |
| // scanline[3] = buffer + width*skip/2*3; | |
| widthbytes += (step - 1); | |
| widthbytes -= (widthbytes % step); | |
| origwidthextra = (origwidthbytes % step); | |
| scanline[0] = buffer; | |
| scanline[1] = buffer + widthbytes/2; | |
| scanline[2] = buffer + widthbytes/2*2; | |
| scanline[3] = buffer + widthbytes/2*3; | |
| for(y=0; y<4; y++) | |
| { | |
| if(yposi+y >=0 && yposi+y<height) | |
| { | |
| unsigned short *ptr = RGB48; | |
| if(neg) | |
| ptr += (height-1-yposi-y)*spitch; | |
| else | |
| ptr += (yposi+y)*spitch; | |
| memcpy(scanline[y], ptr, origwidthbytes); | |
| } | |
| else | |
| { | |
| memset(scanline[y], 0, origwidthbytes); | |
| } | |
| } | |
| { | |
| for(y=0;y<height; y++) | |
| { | |
| unsigned short *ptr = RGB48; | |
| if(neg) | |
| ptr += (height-y-1)*spitch; | |
| else | |
| ptr += y*spitch; | |
| outline128 = (__m128i *)ptr; | |
| lineA = (__m128i *)scanline[0]; | |
| lineB = (__m128i *)scanline[1]; | |
| lineC = (__m128i *)scanline[2]; | |
| lineD = (__m128i *)scanline[3]; | |
| //for(x=0;x<width*skip/2; x+=step) | |
| for(x=0;x<widthbytes; x+=step) | |
| { | |
| __m128i half; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_WP13: | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lB = _mm_loadu_si128(lineB++); | |
| lC = _mm_loadu_si128(lineC++); | |
| lD = _mm_loadu_si128(lineD++); | |
| shift = 0; | |
| } | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_RG48: | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| shift = 3; | |
| } | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_YUYV: | |
| lA = _mm_loadu_si128(lineA); | |
| lA = _mm_unpackhi_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128(lineB); | |
| lB = _mm_unpackhi_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128(lineC); | |
| lC = _mm_unpackhi_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128(lineD); | |
| lD = _mm_unpackhi_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| shift = 3; | |
| break; | |
| } | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| if(shift) | |
| { | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| } | |
| else | |
| { | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| } | |
| if(skip == 6) //RGB48 || WP13 | |
| { | |
| if(widthbytes == origwidthbytes || x+16 < origwidthbytes) | |
| _mm_storeu_si128(outline128++, o128); | |
| else | |
| { | |
| //if(x < origwidthbytes+16/*bytes in an SSE2 reg*/) | |
| _mm_storeu_si128((__m128i *)scanline[0], o128); | |
| memcpy((char *)outline128, (char *)scanline[0], origwidthextra); | |
| outline128++; | |
| } | |
| } | |
| else | |
| { | |
| half = o128; | |
| } | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_WP13: | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lB = _mm_loadu_si128(lineB++); | |
| lC = _mm_loadu_si128(lineC++); | |
| lD = _mm_loadu_si128(lineD++); | |
| shift = 0; | |
| } | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_RG48: | |
| { | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| shift = 3; | |
| } | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_YUYV: | |
| lA = _mm_loadu_si128(lineA++); | |
| lA = _mm_unpacklo_epi8 (_mm_setzero_si128(), lA); | |
| lB = _mm_loadu_si128(lineB++); | |
| lB = _mm_unpacklo_epi8 (_mm_setzero_si128(), lB); | |
| lC = _mm_loadu_si128(lineC++); | |
| lC = _mm_unpacklo_epi8 (_mm_setzero_si128(), lC); | |
| lD = _mm_loadu_si128(lineD++); | |
| lD = _mm_unpacklo_epi8 (_mm_setzero_si128(), lD); | |
| lA = _mm_srli_epi16(lA,3); //13-bit unsigned | |
| lB = _mm_srli_epi16(lB,3); //13-bit unsigned | |
| lC = _mm_srli_epi16(lC,3); //13-bit unsigned | |
| lD = _mm_srli_epi16(lD,3); //13-bit unsigned | |
| shift = 3; | |
| break; | |
| } | |
| o128 = _mm_mulhi_epi16(lA, gA); | |
| t1 = _mm_mulhi_epi16(lB, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lC, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = _mm_mulhi_epi16(lD, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| if(shift) | |
| { | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| } | |
| else | |
| { | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| } | |
| if(skip != 6) //!RGB48 || !WP13 | |
| { | |
| half = _mm_srli_epi16(half,8); | |
| o128 = _mm_srli_epi16(o128,8); | |
| o128 = _mm_packus_epi16(o128, half); | |
| } | |
| if(widthbytes == origwidthbytes || x+32 < origwidthbytes) | |
| { | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| else | |
| { | |
| //if(x+16 < origwidthbytes+16) | |
| if(origwidthextra > 16) | |
| { | |
| _mm_storeu_si128((__m128i *)scanline[0], o128); | |
| memcpy((char *)outline128, (char *)scanline[0], origwidthextra - 16); | |
| } | |
| outline128++; | |
| } | |
| } | |
| tline = scanline[0]; | |
| scanline[0] = scanline[1]; | |
| scanline[1] = scanline[2]; | |
| scanline[2] = scanline[3]; | |
| scanline[3] = tline; | |
| if(yposi+y+4 >=0 && yposi+y+4<height) | |
| { | |
| unsigned short *ptr = RGB48; | |
| if(neg) | |
| ptr += (height-1-(yposi+y+4))*spitch; | |
| else | |
| ptr += (yposi+y+4)*spitch; | |
| memcpy(scanline[3], ptr, origwidthbytes); | |
| } | |
| else | |
| { | |
| memset(scanline[3], 0, origwidthbytes); | |
| } | |
| } | |
| } | |
| } | |
| void RGB48HoriShiftZoom(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, int height, int line, float hoffset, float roffset, float zoom, int flip, float frameTilt, int eye) | |
| { | |
| float xposf,xstepf; | |
| int x; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| short *sscanline = (short *)buffer; | |
| int neg = 0; | |
| float offset = hoffset; | |
| if(flip) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| ptrR -= 6; | |
| } | |
| } | |
| if(eye > 0) | |
| { | |
| zoom *= 1.0f + frameTilt; | |
| } | |
| else | |
| { | |
| zoom /= 1.0f + frameTilt; | |
| } | |
| xposf = (float)width*(0.5f - 1.0f/(2.0f*zoom) - offset); | |
| xposf -= width * roffset * 0.5f / zoom; | |
| xposf += (float)line * ((float)width* roffset / ((float)height*zoom)); | |
| if(xposf < 0.0) | |
| neg = 1; | |
| xstepf = 1.0f/zoom; | |
| memcpy(scanline, RGB48, width*3*2); | |
| { | |
| //unsigned short zeroline[3] = {0}; | |
| int xx = 0; | |
| int ixpos = (int)(xposf * 65536.0f); | |
| int ixstep = (int)(xstepf * 65536.0f); | |
| float xbase = xposf / (float)width; | |
| float xstep = xstepf / (float)width; | |
| float z = (decoder->cfhddata.FrameHDynamic - 1.0f)*2.0f; | |
| // int holdstart = width*5/10; // Use to specify a area of uniform stretch | |
| // int holdend = width*5/10; | |
| int holdstart = (int)((decoder->cfhddata.FrameHDynCenter - decoder->cfhddata.FrameHDynWidth*0.125)*(float)width); | |
| int holdend = (int)((decoder->cfhddata.FrameHDynCenter + decoder->cfhddata.FrameHDynWidth*0.125)*(float)width); | |
| float flatxstep; | |
| float modified_xstep_avg; | |
| float bottomxstep; | |
| float basexstepstart; | |
| float basexstepend; | |
| float range; | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| __m64 overflowprotect = _mm_set1_pi16(0x7fff-0x3fff); | |
| #endif | |
| if(holdstart < 0) holdstart = 0, holdend = (int)((decoder->cfhddata.FrameHDynWidth*0.5)*(float)width); | |
| if(holdend > width) holdend = width, holdstart = (int)((1.0 - decoder->cfhddata.FrameHDynWidth*0.5)*(float)width); | |
| range = (float)(holdend - holdstart); | |
| flatxstep = xstep-z*0.5f*xstep; | |
| modified_xstep_avg = (xstep * (float)width - range * flatxstep) / ((float)width - range); | |
| bottomxstep = modified_xstep_avg - (flatxstep - modified_xstep_avg); | |
| if(holdstart == (width-holdend)) | |
| { | |
| basexstepstart = bottomxstep; | |
| basexstepend = bottomxstep; | |
| } | |
| else if(holdstart < (width-holdend)) | |
| { | |
| float a = (float)holdstart / (float)(width-holdend); | |
| float startavg = a * modified_xstep_avg + (1.0f - a) * flatxstep; | |
| float endavg = (modified_xstep_avg * ((float)width-range) - startavg * (float)holdstart) / (float)(width-holdend); | |
| basexstepstart = startavg - (flatxstep - startavg); | |
| basexstepend = endavg - (flatxstep - endavg); | |
| } | |
| else | |
| { | |
| float a = (float)(width-holdend) / (float)holdstart; | |
| float endavg = a * modified_xstep_avg + (1.0f - a) * flatxstep; | |
| float startavg = (modified_xstep_avg * ((float)width-range) - endavg * (float)(width-holdend)) / (float)holdstart; | |
| basexstepstart = startavg - (flatxstep - startavg); | |
| basexstepend = endavg - (flatxstep - endavg); | |
| } | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13) | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += basexstepstart*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += basexstepend*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| xp = (int)(fxpos * 65536.0f*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1;// was -2 causing a right shift //DAN20100225 | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| if(xp>4 && xp<width-4 && xx < (width-1)*3) //We need 3 values for RGB< yet we write 4, so the last pixel can't be done with MMX | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-1)*3; | |
| src64 = (__m64 *)&sscanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| src64 = (__m64 *)&sscanline[linepos+3]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+6]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+9]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 1); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| #endif | |
| { | |
| int i,r=0,g=0,b=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * sscanline[xp*3]); | |
| g += (gains * sscanline[xp*3+1]); | |
| b += (gains * sscanline[xp*3+2]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| } | |
| xx+=3; | |
| } | |
| } | |
| else | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += basexstepstart*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += basexstepend*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| xp = (int)(fxpos * 65536.0f*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1; // was -2 causing a right shift //DAN20100225 | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| if(xp>4 && xp<width-4) | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-0)*3; //DAN20102602 -- fix left edge error. | |
| src64 = (__m64 *)&scanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| src64 = (__m64 *)&scanline[linepos+3]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+6]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+9]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 2); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| #endif | |
| { | |
| int i,r=0,g=0,b=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * scanline[xp*3]); | |
| g += (gains * scanline[xp*3+1]); | |
| b += (gains * scanline[xp*3+2]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| } | |
| xx+=3; | |
| } | |
| } | |
| } | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| //_mm_empty(); | |
| #endif | |
| } | |
| #if 0 //Why is this not used? | |
| void RGB48HoriShiftZoomFine(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, int height, int line, float hoffset, float roffset, float zoom, int flip, float frameTilt, int eye) | |
| { | |
| float xposf,remainf,xstepf; | |
| int xposi,tablepos,x; | |
| int Ra,Rb,Rc,Rd; | |
| int Ga,Gb,Gc,Gd; | |
| int Ba,Bb,Bc,Bd; | |
| int gainA,gainB,gainC,gainD; | |
| int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| short *sscanline = (short *)buffer; | |
| int neg = 0,shift = 0; | |
| float offset = hoffset; | |
| __m128i l1,l2,l3,gA,gB,gC,gD,o128,t1,t2; | |
| __m128i *line128, *outline128; | |
| if(flip) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| ptrR -= 6; | |
| } | |
| } | |
| if(eye > 0) | |
| { | |
| zoom *= 1.0 + frameTilt; | |
| } | |
| else | |
| { | |
| zoom /= 1.0 + frameTilt; | |
| } | |
| xposf = (float)width*(0.5 - 1.0/(2.0*zoom) - offset); | |
| xposf -= width * roffset * 0.5 / zoom; | |
| xposf += (float)line * ((float)width* roffset / ((float)height*zoom)); | |
| if(xposf < 0.0) | |
| neg = 1; | |
| xstepf = 1.0/zoom; | |
| memcpy(scanline, RGB48, width*3*2); | |
| { | |
| unsigned short zeroline[3] = {0}; | |
| int xx = 0; | |
| int ixpos = xposf * 65536.0; | |
| int ixstep = xstepf * 65536.0; | |
| float xbase = xposf / (float)width; | |
| float xstep = xstepf / (float)width; | |
| float z = (decoder->cfhddata.FrameHDynamic - 1.0)*2.0; | |
| int holdstart = width*5/10; // Use to specify a area of uniform stretch | |
| int holdend = width*5/10; | |
| float flatxstep = xstep-z*0.5*xstep; | |
| float modified_xstep_avg = (xstep * (float)width - (float)(holdend - holdstart) * flatxstep) / (float)(width - (holdend - holdstart)); | |
| float bottomxstep = modified_xstep_avg - (flatxstep- modified_xstep_avg); | |
| __m64 overflowprotect = _mm_set1_pi16(0x7fff-0x3fff); | |
| if(bottomxstep < 0.0) | |
| { | |
| bottomxstep = 0.0; | |
| flatxstep = modified_xstep_avg + modified_xstep_avg; | |
| } | |
| if(flatxstep < 0.0) | |
| { | |
| flatxstep = 0.0; | |
| bottomxstep = modified_xstep_avg - (flatxstep- modified_xstep_avg); | |
| } | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13) | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += bottomxstep*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += bottomxstep*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| /* fxpos = xbase + xstep * x;//(float)ixpos/(65536.0*(float)width); | |
| if(fxpos >= 0.0 && fxpos <= 1.0) | |
| { | |
| if(z > 0.0) | |
| { | |
| fxpos = 1.8*fxpos - 2.4*fxpos*fxpos + (1.6*fxpos*fxpos*fxpos); | |
| fxpos = fxpos * (z) + (xbase + xstep * x) * (1.0-z); | |
| } | |
| else | |
| { | |
| fxpos = 3.0*fxpos*fxpos - 2.0*fxpos*fxpos*fxpos; | |
| fxpos = fxpos * (-z) + (xbase + xstep * x) * (1.0+z); | |
| } | |
| } | |
| */ | |
| xp = (fxpos * 65536.0*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1;// was -2 causing a right shift //DAN20100225 | |
| if(xp>4 && xp<width-4) | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-1)*3; | |
| src64 = (__m64 *)&sscanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| src64 = (__m64 *)&sscanline[linepos+3]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+6]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+9]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 1); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| { | |
| int i,t,r=0,g=0,b=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| /* if(i == 3) //DAN20101112 this code was crashing disparity zoom | |
| { | |
| gains = lanczos[rmdr]>>1; | |
| r += (gains * sscanline[(xp-1)*3]); | |
| g += (gains * sscanline[(xp-1)*3+1]); | |
| b += (gains * sscanline[(xp-1)*3+2]); | |
| } | |
| else */ | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * sscanline[xp*3]); | |
| g += (gains * sscanline[xp*3+1]); | |
| b += (gains * sscanline[xp*3+2]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| } | |
| xx+=3; | |
| } | |
| } | |
| else | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += bottomxstep*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += bottomxstep*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| /* fxpos = xbase + xstep * x;//(float)ixpos/(65536.0*(float)width); | |
| if(fxpos >= 0.0 && fxpos <= 1.0) | |
| { | |
| if(z > 0.0) | |
| { | |
| fxpos = 1.8*fxpos - 2.4*fxpos*fxpos + (1.6*fxpos*fxpos*fxpos); | |
| fxpos = fxpos * (z) + (xbase + xstep * x) * (1.0-z); | |
| } | |
| else | |
| { | |
| fxpos = 3.0*fxpos*fxpos - 2.0*fxpos*fxpos*fxpos; | |
| fxpos = fxpos * (-z) + (xbase + xstep * x) * (1.0+z); | |
| } | |
| } | |
| */ | |
| xp = (fxpos * 65536.0*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1; // was -2 causing a right shift //DAN20100225 | |
| if(xp>4 && xp<width-4) | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-0)*3; //DAN20102602 -- fix left edge error. | |
| src64 = (__m64 *)&scanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| src64 = (__m64 *)&scanline[linepos+3]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+6]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+9]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 2); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| { | |
| int i,t,r=0,g=0,b=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| /* if(i == 3) //DAN20101112 this code was crashing disparity zoom | |
| { | |
| gains = lanczos[rmdr]>>1; | |
| r += (gains * scanline[(xp-1)*3]); | |
| g += (gains * scanline[(xp-1)*3+1]); | |
| b += (gains * scanline[(xp-1)*3+2]); | |
| } | |
| else */ | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * scanline[xp*3]); | |
| g += (gains * scanline[xp*3+1]); | |
| b += (gains * scanline[xp*3+2]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| } | |
| xx+=3; | |
| } | |
| } | |
| } | |
| /* | |
| memcpy(scanline, RGB48, width*3*2); | |
| { | |
| for(x=0;x<width*3; x+=3) //RGB | |
| { | |
| int r,g,b,xp = ((int)xposf)*3; | |
| xposf += xstepf; | |
| if(xp<0 || xp>= width*3) | |
| { | |
| RGB48[x] = 0; | |
| RGB48[x+1] = 0; | |
| RGB48[x+2] = 0; | |
| } | |
| else | |
| { | |
| r = scanline[xp]; | |
| g = scanline[xp+1]; | |
| b = scanline[xp+2]; | |
| RGB48[x] = r; | |
| RGB48[x+1] = g; | |
| RGB48[x+2] = b; | |
| } | |
| } | |
| } | |
| */ | |
| //_mm_empty(); | |
| } | |
| #endif | |
| void RGBA64HoriShiftZoom(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, int height, int line, float hoffset, float roffset, float zoom, int flip, float frameTilt, int eye) | |
| { | |
| float xposf,xstepf; | |
| int x; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| short *sscanline = (short *)buffer; | |
| int neg = 0; | |
| float offset = hoffset; | |
| if(flip) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*4) - 4; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| t = *ptrL; | |
| *ptrL++ = *ptrR; | |
| *ptrR++ = t; | |
| ptrR -= 4; | |
| } | |
| } | |
| if(eye > 0) | |
| { | |
| zoom *= 1.0f + frameTilt; | |
| } | |
| else | |
| { | |
| zoom /= 1.0f + frameTilt; | |
| } | |
| xposf = (float)width*(0.5f - 1.0f/(2.0f*zoom) - offset); | |
| xposf -= width * roffset * 0.5f; | |
| xposf += line * (width* roffset / ((float)height*zoom)); | |
| if(xposf < 0.0) | |
| neg = 1; | |
| xstepf = 1.0f/zoom; | |
| memcpy(scanline, RGB48, width*4*2); | |
| { | |
| //unsigned short zeroline[3] = {0}; | |
| int xx = 0; | |
| int ixpos = (int)(xposf * 65536.0f); | |
| int ixstep = (int)(xstepf * 65536.0f); | |
| float xbase = xposf / (float)width; | |
| float xstep = xstepf / (float)width; | |
| float z = (decoder->cfhddata.FrameHDynamic - 1.0f)*2.0f; | |
| int holdstart = width*5/10; // Use to specify a area of uniform stretch | |
| int holdend = width*5/10; | |
| float flatxstep = xstep-z*0.5f*xstep; | |
| float modified_xstep_avg = (xstep * (float)width - (float)(holdend - holdstart) * flatxstep) / (float)(width - (holdend - holdstart)); | |
| float bottomxstep = modified_xstep_avg - (flatxstep- modified_xstep_avg); | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| __m64 overflowprotect = _mm_set1_pi16(0x7fff-0x3fff); | |
| #endif | |
| if(bottomxstep < 0.0) | |
| { | |
| bottomxstep = 0.0; | |
| flatxstep = modified_xstep_avg + modified_xstep_avg; | |
| } | |
| if(flatxstep < 0.0) | |
| { | |
| flatxstep = 0.0; | |
| bottomxstep = modified_xstep_avg - (flatxstep- modified_xstep_avg); | |
| } | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_W13A) | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += bottomxstep*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += bottomxstep*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| xp = (int)(fxpos * 65536.0f*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1;// was -2 causing a right shift //DAN20100225 | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| if(xp>4 && xp<width-4) | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-1)*4; | |
| src64 = (__m64 *)&sscanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| src64 = (__m64 *)&sscanline[linepos+4]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+8]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&sscanline[linepos+12]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //13*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 1); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| #endif | |
| { | |
| int i,r=0,g=0,b=0,a=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * sscanline[xp*4]); | |
| g += (gains * sscanline[xp*4+1]); | |
| b += (gains * sscanline[xp*4+2]); | |
| a += (gains * sscanline[xp*4+3]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| a >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| if(a<0) a=0; else if(a>65535) a=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| RGB48[xx+3] = a; | |
| } | |
| xx+=4; | |
| } | |
| } | |
| else | |
| { | |
| float fxpos = xbase; | |
| for(x=0;x<width; x++) //RGB | |
| { | |
| int gains = 0; | |
| int xp, rmdr; | |
| if(z != 0.0) | |
| { | |
| if(x<holdstart) | |
| { | |
| fxpos += bottomxstep*((float)(holdstart-x)/(float)holdstart) + flatxstep*((float)x/(float)holdstart); | |
| } | |
| else if(x>holdend) | |
| { | |
| int diff = width - x; | |
| int range = width - holdend; | |
| fxpos += bottomxstep*((float)(range-diff)/(float)range) + flatxstep*((float)(diff)/(float)range); | |
| } | |
| else | |
| { | |
| fxpos += flatxstep; | |
| } | |
| xp = (int)(fxpos * 65536.0f*(float)width); | |
| rmdr = 63-((xp>>10) & 63); | |
| xp >>= 16; | |
| } | |
| else | |
| { | |
| xp = ixpos>>16; | |
| rmdr = 63-((ixpos>>10) & 63); | |
| ixpos += ixstep; | |
| } | |
| xp -= 1; // was -2 causing a right shift //DAN20100225 | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| if(xp>4 && xp<width-4) | |
| { | |
| __m64 *src64; | |
| __m64 *dst64; | |
| __m64 sumx16; | |
| __m64 rgbx16; | |
| __m64 gain16; | |
| int linepos = (xp-0)*4; //DAN20102602 -- fix left edge error. | |
| src64 = (__m64 *)&scanline[linepos]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| sumx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| src64 = (__m64 *)&scanline[linepos+4]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+64]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+8]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+128]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| src64 = (__m64 *)&scanline[linepos+12]; | |
| rgbx16 = *src64; | |
| gain16 = _mm_set1_pi16(lanczos[rmdr+192]); //15-bit | |
| rgbx16 = _mm_srli_pi16(rgbx16, 1); //15-bit | |
| rgbx16 = _mm_mulhi_pi16(rgbx16, gain16); //15*15-bit | |
| sumx16 = _mm_adds_pi16(sumx16, rgbx16); | |
| sumx16 = _mm_adds_pi16(sumx16, overflowprotect); | |
| sumx16 = _mm_subs_pu16(sumx16, overflowprotect); | |
| sumx16 = _mm_slli_pi16(sumx16, 2); | |
| dst64 = (__m64 *)&RGB48[xx]; | |
| *dst64 = sumx16; | |
| } | |
| else | |
| #endif | |
| { | |
| int i,r=0,g=0,b=0,a=0; | |
| for(i=0; i<4; i++) | |
| { | |
| if(xp<=0 || xp>= width) | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| } | |
| else | |
| { | |
| gains += lanczos[rmdr]>>1; | |
| r += (gains * scanline[xp*4]); | |
| g += (gains * scanline[xp*4+1]); | |
| b += (gains * scanline[xp*4+2]); | |
| a += (gains * scanline[xp*4+3]); | |
| gains = 0; | |
| } | |
| xp++; | |
| rmdr+=64; | |
| } | |
| r >>= 14; | |
| g >>= 14; | |
| b >>= 14; | |
| a >>= 14; | |
| if(r<0) r=0; else if(r>65535) r=65535; | |
| if(g<0) g=0; else if(g>65535) g=65535; | |
| if(b<0) b=0; else if(b>65535) b=65535; | |
| if(a<0) a=0; else if(a>65535) a=65535; | |
| RGB48[xx] = r; | |
| RGB48[xx+1] = g; | |
| RGB48[xx+2] = b; | |
| RGB48[xx+3] = a; | |
| } | |
| xx+=4; | |
| } | |
| } | |
| } | |
| #if MMXSUPPORTED //TODO DANREMOVE | |
| //_mm_empty(); | |
| #endif | |
| } | |
| void RGB48WindowMask(DECODER *decoder, unsigned short *RGB48, int width, int channel, float windowMask) | |
| { | |
| float line = (float)width * fabsf(windowMask); | |
| int pixelbytes = 6; | |
| float frac = (float)(line-(float)((int)line)); | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| pixelbytes = 8; | |
| break; | |
| } | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_W13A || | |
| decoder->StereoBufferFormat == DECODED_FORMAT_WP13) // signed math needed | |
| { | |
| short *ptrL = (short *)RGB48; | |
| short *ptrR = (short *)RGB48; | |
| if(windowMask < 0) | |
| channel = channel == 0 ? 1 : 0; | |
| if(pixelbytes == 6) | |
| { | |
| if(channel == 0) | |
| { | |
| memset(ptrL, 0, 6*(int)line); | |
| ptrL += ((int)line*3); | |
| ptrL[0] = (int)((float)ptrL[0] * (1.0-frac)); | |
| ptrL[1] = (int)((float)ptrL[1] * (1.0-frac)); | |
| ptrL[2] = (int)((float)ptrL[2] * (1.0-frac)); | |
| } | |
| else | |
| { | |
| ptrR += ((width-(int)line)*3); | |
| memset(ptrR, 0, 6*(int)line); | |
| ptrR[-1] = (int)((float)ptrR[-1] * (1.0-frac)); | |
| ptrR[-2] = (int)((float)ptrR[-2] * (1.0-frac)); | |
| ptrR[-3] = (int)((float)ptrR[-3] * (1.0-frac)); | |
| } | |
| } | |
| else | |
| { | |
| if(channel == 0) | |
| { | |
| memset(ptrL, 0, 8*(int)line); | |
| ptrL += ((int)line*4); | |
| ptrL[0] = (int)((float)ptrL[0] * (1.0-frac)); | |
| ptrL[1] = (int)((float)ptrL[1] * (1.0-frac)); | |
| ptrL[2] = (int)((float)ptrL[2] * (1.0-frac)); | |
| ptrL[3] = (int)((float)ptrL[3] * (1.0-frac)); | |
| } | |
| else | |
| { | |
| ptrR += ((width-(int)line)*4); | |
| memset(ptrR, 0, 8*(int)line); | |
| ptrR[-1] = (int)((float)ptrR[-1] * (1.0-frac)); | |
| ptrR[-2] = (int)((float)ptrR[-2] * (1.0-frac)); | |
| ptrR[-3] = (int)((float)ptrR[-3] * (1.0-frac)); | |
| ptrR[-4] = (int)((float)ptrR[-4] * (1.0-frac)); | |
| } | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| if(windowMask < 0) | |
| channel = channel == 0 ? 1 : 0; | |
| if(pixelbytes == 6) | |
| { | |
| if(channel == 0) | |
| { | |
| memset(ptrL, 0, 6*(int)line); | |
| ptrL += ((int)line*3); | |
| ptrL[0] = (int)((float)ptrL[0] * (1.0-frac)); | |
| ptrL[1] = (int)((float)ptrL[1] * (1.0-frac)); | |
| ptrL[2] = (int)((float)ptrL[2] * (1.0-frac)); | |
| } | |
| else | |
| { | |
| ptrR += ((width-(int)line)*3); | |
| memset(ptrR, 0, 6*(int)line); | |
| ptrR[-1] = (int)((float)ptrR[-1] * (1.0-frac)); | |
| ptrR[-2] = (int)((float)ptrR[-2] * (1.0-frac)); | |
| ptrR[-3] = (int)((float)ptrR[-3] * (1.0-frac)); | |
| } | |
| } | |
| else | |
| { | |
| if(channel == 0) | |
| { | |
| memset(ptrL, 0, 8*(int)line); | |
| ptrL += ((int)line*4); | |
| ptrL[0] = (int)((float)ptrL[0] * (1.0-frac)); | |
| ptrL[1] = (int)((float)ptrL[1] * (1.0-frac)); | |
| ptrL[2] = (int)((float)ptrL[2] * (1.0-frac)); | |
| ptrL[3] = (int)((float)ptrL[3] * (1.0-frac)); | |
| } | |
| else | |
| { | |
| ptrR += ((width-(int)line)*4); | |
| memset(ptrR, 0, 8*(int)line); | |
| ptrR[-1] = (int)((float)ptrR[-1] * (1.0-frac)); | |
| ptrR[-2] = (int)((float)ptrR[-2] * (1.0-frac)); | |
| ptrR[-3] = (int)((float)ptrR[-3] * (1.0-frac)); | |
| ptrR[-4] = (int)((float)ptrR[-4] * (1.0-frac)); | |
| } | |
| } | |
| } | |
| } | |
| void RGB48HoriShift(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, float offset, int flip) | |
| { | |
| float xposf,remainf; | |
| int xposi,tablepos,x; | |
| int gainA,gainB,gainC,gainD; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| int neg = 0,shift = 0; | |
| __m128i l1,l2,l3,gA,gB,gC,gD,o128,t1,t2; | |
| __m128i *line128, *outline128; | |
| if(flip) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t1,t2,t3; | |
| t1 = ptrL[0]; | |
| ptrL[0] = ptrR[0]; | |
| ptrR[0] = t1; | |
| t2 = ptrL[1]; | |
| ptrL[1] = ptrR[1]; | |
| ptrR[1] = t2; | |
| t3 = ptrL[2]; | |
| ptrL[2] = ptrR[2]; | |
| ptrR[2] = t3; | |
| ptrL += 3; | |
| ptrR -= 3; | |
| } | |
| } | |
| if(offset < 0.0) | |
| neg = 1; | |
| xposf = width * offset; | |
| xposi = (int)floorf(xposf); | |
| remainf = xposf - (float)xposi; | |
| tablepos = (int)(remainf*(float)SUBPIXEL); | |
| xposi = abs(xposi); | |
| if(xposi==0 && tablepos == 0) | |
| return; // no move required | |
| gainA = gains[tablepos][0]; | |
| gainB = gains[tablepos][1]; | |
| gainC = gains[tablepos][2]; | |
| gainD = gains[tablepos][3]; | |
| if(neg == 0) | |
| { | |
| unsigned short *ptr = scanline; | |
| int nwidth = width-xposi+16; | |
| if(nwidth > width) | |
| nwidth = width; | |
| for(x=0;x<xposi+2;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| } | |
| memcpy(ptr, RGB48, (nwidth)*3*2); | |
| ptr += (nwidth)*3; | |
| for(x=0;x<16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptr = scanline; | |
| for(x=0;x<2;x++) | |
| { | |
| if(x+xposi-2>=0) | |
| { | |
| *ptr++ = RGB48[(x+xposi-2)*3];//r | |
| *ptr++ = RGB48[(x+xposi-2)*3+1];//g | |
| *ptr++ = RGB48[(x+xposi-2)*3+2];//b | |
| } | |
| else | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| memcpy(ptr, &RGB48[xposi*3], (width-xposi)*3*2); | |
| ptr += (width-xposi)*3; | |
| for(x=0;x<xposi+16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| gA = _mm_set1_epi16(gainA); | |
| gB = _mm_set1_epi16(gainB); | |
| gC = _mm_set1_epi16(gainC); | |
| gD = _mm_set1_epi16(gainD); | |
| line128 = (__m128i *)&scanline[0]; | |
| //outline128 = line128; | |
| outline128 = (__m128i *)&RGB48[0]; | |
| //l1 = load128;//r1,g1,b1,r2,g2,b2,r3,g3, | |
| //l2 = load128;//b3,r4,g4,b4,r5,g5,b5,r6 | |
| //l3 = load128;//g6,b6,r7,g7,b7,r8,g8,b8 | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13) | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l2 = _mm_loadu_si128(line128++); | |
| l3 = _mm_loadu_si128(line128++); | |
| shift = 0; | |
| } | |
| else | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l1 = _mm_srli_epi16(l1,3); //13-bit unsigned | |
| l2 = _mm_loadu_si128(line128++); | |
| l2 = _mm_srli_epi16(l2,3); //13-bit unsigned | |
| l3 = _mm_loadu_si128(line128++); | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| shift = 3; | |
| } | |
| for(x=0;x<width*3; x+=8) | |
| { | |
| //o=l1* gainA | |
| o128 = _mm_mulhi_epi16(l1, gA); | |
| //t1 = l1<<3*16 //t1 = r2,g2,b2,r3,g3, 0 0 0 | |
| //t2 = l2>>16*5 //t2 = 0 0 0 0 0 b3,r4,g4 | |
| //t1 += t2; //t1 = r2,g2,b2,r3,g3,b3,r4,g4 | |
| //l1 = t1 //l1 = r2,g2,b2,r3,g3,b3,r4,g4 | |
| //t1 *= gainB | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_slli_si128(l2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<3*16 //t1 = r3,g3,b3,r4,g4 0 0 0 | |
| //t2 = l2<<3*16; //t2 = b4,r5,g5,b5,r6 0 0 0 | |
| //t2 >>= 5*16; //t2 = 0 0 0 0 0 b4,r5,g5 | |
| //t1 += t2 //t1 = r3,g3,b3,r4,g4,b4,r5,g5 | |
| //l1 = t1 //l1 = r3,g3,b3,r4,g4,b4,r5,g5 | |
| //t1 *= gainC | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_srli_si128(l2,3*2); | |
| t2 = _mm_slli_si128(t2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<3*16 //t1 = r4,g4,b4,r5,g5 0 0 0 | |
| //t2 = l2<<6*16 //t2 = b5,r6 0 0 0 0 0 0 | |
| //t2 >>= 5 * 16; //t2 = 0 0 0 0 0 b5,r6 0 | |
| //t1 += t2 //t1 = r4,g4,b4,r5,g5,b5,r6, 0 | |
| //t2 = l3>>7*16 //t2 = 0 0 0 0 0 0 0 g6 | |
| //t1 += t2 //t1 = r4,g4,b4,r5,g5,b5,r6,g6 | |
| //t1 *= gainD | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_srli_si128(l2,6*2); | |
| t2 = _mm_slli_si128(t2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| t2 = _mm_slli_si128(l3,7*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| t1 = _mm_mulhi_epi16(t1, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| l1 = l2; | |
| l2 = l3; | |
| l3 = _mm_loadu_si128(line128++); | |
| if(shift) | |
| { | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| } | |
| else | |
| { | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| } | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| } | |
| void RGBA64HoriShift(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, float offset, int flip) | |
| { | |
| float xposf,remainf; | |
| int xposi,tablepos,x; | |
| int gainA,gainB,gainC,gainD; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| int neg = 0,shift = 0; | |
| __m128i l1,l2,l3,gA,gB,gC,gD,o128,t1,t2; | |
| __m128i *line128, *outline128; | |
| if(flip) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*4) - 4; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t1,t2,t3,t4; | |
| t1 = ptrL[0]; | |
| ptrL[0] = ptrR[0]; | |
| ptrR[0] = t1; | |
| t2 = ptrL[1]; | |
| ptrL[1] = ptrR[1]; | |
| ptrR[1] = t2; | |
| t3 = ptrL[2]; | |
| ptrL[2] = ptrR[2]; | |
| ptrR[2] = t3; | |
| t4 = ptrL[2]; | |
| ptrL[3] = ptrR[3]; | |
| ptrR[3] = t4; | |
| ptrL += 4; | |
| ptrR -= 4; | |
| } | |
| } | |
| if(offset < 0.0) | |
| neg = 1; | |
| xposf = width * offset; | |
| xposi = (int)floorf(xposf); | |
| remainf = xposf - (float)xposi; | |
| tablepos = (int)(remainf*(float)SUBPIXEL); | |
| xposi = abs(xposi); | |
| if(xposi==0 && tablepos == 0) | |
| return; // no move required | |
| gainA = gains[tablepos][0]; | |
| gainB = gains[tablepos][1]; | |
| gainC = gains[tablepos][2]; | |
| gainD = gains[tablepos][3]; | |
| if(neg == 0) | |
| { | |
| unsigned short *ptr = scanline; | |
| int nwidth = width-xposi+16; | |
| if(nwidth > width) | |
| nwidth = width; | |
| for(x=0;x<xposi+2;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| *ptr++ = 0;//a | |
| } | |
| memcpy(ptr, RGB48, (nwidth)*4*2); | |
| ptr += (nwidth)*4; | |
| for(x=0;x<16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| *ptr++ = 0;//a | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptr = scanline; | |
| for(x=0;x<2;x++) | |
| { | |
| if(x+xposi-2>=0) | |
| { | |
| *ptr++ = RGB48[(x+xposi-2)*4];//r | |
| *ptr++ = RGB48[(x+xposi-2)*4+1];//g | |
| *ptr++ = RGB48[(x+xposi-2)*4+2];//b | |
| *ptr++ = RGB48[(x+xposi-2)*4+3];//a | |
| } | |
| else | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| *ptr++ = 0;//a | |
| } | |
| } | |
| memcpy(ptr, &RGB48[xposi*4], (width-xposi)*4*2); | |
| ptr += (width-xposi)*4; | |
| for(x=0;x<xposi+16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| *ptr++ = 0;//g | |
| *ptr++ = 0;//b | |
| *ptr++ = 0;//a | |
| } | |
| } | |
| gA = _mm_set1_epi16(gainA); | |
| gB = _mm_set1_epi16(gainB); | |
| gC = _mm_set1_epi16(gainC); | |
| gD = _mm_set1_epi16(gainD); | |
| line128 = (__m128i *)&scanline[0]; | |
| //outline128 = line128; | |
| outline128 = (__m128i *)&RGB48[0]; | |
| //l1 = load128;//r1,g1,b1,a1,r2,g2,b2,a2, | |
| //l2 = load128;//r3,g3,b3,a3,r4,g4,b4,a4, | |
| //l3 = load128;//r5,g5,b5,a5,r6,g6,b6,a6, | |
| //l4 = load128;//r7,g7,b7,a7,r8,g8,b8,a8, | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13 || decoder->StereoBufferFormat == DECODED_FORMAT_W13A) | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l2 = _mm_loadu_si128(line128++); | |
| l3 = _mm_loadu_si128(line128++); | |
| shift = 0; | |
| } | |
| else | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l1 = _mm_srli_epi16(l1,3); //13-bit unsigned | |
| l2 = _mm_loadu_si128(line128++); | |
| l2 = _mm_srli_epi16(l2,3); //13-bit unsigned | |
| l3 = _mm_loadu_si128(line128++); | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| shift = 3; | |
| } | |
| for(x=0;x<width*4; x+=8) | |
| { | |
| //o=l1* gainA | |
| o128 = _mm_mulhi_epi16(l1, gA); | |
| //t1 = l1<<4*16 //t1 = r2,g2,b2,a2,0, 0 0 0 | |
| //t2 = l2>>4*16 //t2 = 0 0 0 0 r3,g3,b3,a4 | |
| //t1 += t2; //t1 = r2,g2,b2,a2,r3,g3,b3,a4 | |
| //l1 = t1 //l1 = r2,g2,b2,a2,r3,g3,b3,a4 | |
| //t1 *= gainB | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,4*2); | |
| t2 = _mm_slli_si128(l2,4*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gB); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<4*16 //t1 = r3,g3,b3,a3, 0 0 0 0 | |
| //t2 = l2<<4*16;//t2 = r4,g4,b4,a4, 0 0 0 0 | |
| //t2 >>= 4*16; //t2 = 0 0 0 0 r4,g4,b4,a4 | |
| //t1 += t2 //t1 = r3,g3,b3,a4,r4,g4,b4,a4 | |
| //l1 = t1 //l1 = r3,g3,b3,a4,r4,g4,b4,a4 | |
| //t1 *= gainC | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,4*2); | |
| t2 = _mm_srli_si128(l2,4*2); | |
| t2 = _mm_slli_si128(t2,4*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gC); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<4*16 //t1 = r4,g4,b4,a4,0 0 0 0 | |
| //t2 = l3>>4*16 //t2 = 0 0 0 0 r5,g5,b5,a5 | |
| //t1 += t2 //t1 = r4,g4,b4,a4,r5,g5,b5,a5 | |
| //t1 *= gainD | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,4*2); | |
| t2 = _mm_slli_si128(l3,4*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| t1 = _mm_mulhi_epi16(t1, gD); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| l1 = l2; | |
| l2 = l3; | |
| l3 = _mm_loadu_si128(line128++); | |
| if(shift) | |
| { | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| } | |
| else | |
| { | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| } | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| } | |
| void RGB48HoriShiftAnaglyph(DECODER *decoder, unsigned short *RGB48, unsigned short *buffer, int width, | |
| float offsetR, float offsetG, float offsetB , | |
| int flipR, int flipG, int flipB) | |
| { | |
| float Rxposf,Rremainf; | |
| int Rxposi,Rtablepos; | |
| float Gxposf,Gremainf; | |
| int Gxposi,Gtablepos; | |
| float Bxposf,Bremainf; | |
| int Bxposi,Btablepos; | |
| int x; | |
| int RgainA,RgainB,RgainC,RgainD; | |
| int GgainA,GgainB,GgainC,GgainD; | |
| int BgainA,BgainB,BgainC,BgainD; | |
| //int endofSSEline = 0; | |
| unsigned short *scanline = (unsigned short *)buffer; | |
| int negR = 0; | |
| int negG = 0; | |
| int negB = 0; | |
| int shift = 0; | |
| __m128i l1,l2,l3,o128,t1,t2; | |
| __m128i *line128, *outline128; | |
| __m128i gA1,gB1,gC1,gD1,gA2,gB2,gC2,gD2,gA3,gB3,gC3,gD3; | |
| if(flipR) | |
| { | |
| unsigned short *ptrL = RGB48; | |
| unsigned short *ptrR = RGB48; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL = *ptrR; | |
| *ptrR = t; | |
| ptrL += 3; | |
| ptrR -= 3; | |
| } | |
| } | |
| if(flipG) | |
| { | |
| unsigned short *ptrL = &RGB48[1]; | |
| unsigned short *ptrR = &RGB48[1]; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL = *ptrR; | |
| *ptrR = t; | |
| ptrL += 3; | |
| ptrR -= 3; | |
| } | |
| } | |
| if(flipB) | |
| { | |
| unsigned short *ptrL = &RGB48[2]; | |
| unsigned short *ptrR = &RGB48[2]; | |
| ptrR += (width*3) - 3; | |
| for(x=0;x<width/2;x++) | |
| { | |
| int t; | |
| t = *ptrL; | |
| *ptrL = *ptrR; | |
| *ptrR = t; | |
| ptrL += 3; | |
| ptrR -= 3; | |
| } | |
| } | |
| if(offsetR < 0.0) | |
| negR = 1; | |
| if(offsetG < 0.0) | |
| negG = 1; | |
| if(offsetB < 0.0) | |
| negB = 1; | |
| Rxposf = width * offsetR; | |
| Rxposi = (int)floorf(Rxposf); | |
| Rremainf = Rxposf - (float)Rxposi; | |
| Rtablepos = (int)(Rremainf*(float)SUBPIXEL); | |
| Gxposf = width * offsetG; | |
| Gxposi = (int)floorf(Gxposf); | |
| Gremainf = Gxposf - (float)Gxposi; | |
| Gtablepos = (int)(Gremainf*(float)SUBPIXEL); | |
| Bxposf = width * offsetB; | |
| Bxposi = (int)floorf(Bxposf); | |
| Bremainf = Bxposf - (float)Bxposi; | |
| Btablepos = (int)(Bremainf*(float)SUBPIXEL); | |
| Rxposi = abs(Rxposi); | |
| Gxposi = abs(Gxposi); | |
| Bxposi = abs(Bxposi); | |
| if(Rxposi==0 && Rtablepos == 0) | |
| return; // no move required | |
| RgainA = gains[Rtablepos][0]; | |
| RgainB = gains[Rtablepos][1]; | |
| RgainC = gains[Rtablepos][2]; | |
| RgainD = gains[Rtablepos][3]; | |
| GgainA = gains[Gtablepos][0]; | |
| GgainB = gains[Gtablepos][1]; | |
| GgainC = gains[Gtablepos][2]; | |
| GgainD = gains[Gtablepos][3]; | |
| BgainA = gains[Btablepos][0]; | |
| BgainB = gains[Btablepos][1]; | |
| BgainC = gains[Btablepos][2]; | |
| BgainD = gains[Btablepos][3]; | |
| if(negR == 0) | |
| { | |
| unsigned short *ptr = scanline; | |
| int nwidth = width-Rxposi+16; | |
| if(nwidth > width) | |
| nwidth = width; | |
| for(x=0;x<Rxposi+2;x++) | |
| { | |
| *ptr++ = 0;//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<nwidth;x++) | |
| { | |
| *ptr++ = RGB48[x*3];//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptr = scanline; | |
| for(x=0;x<2;x++) | |
| { | |
| if(x+Rxposi-2>=0) | |
| { | |
| *ptr++ = RGB48[(x+Rxposi-2)*3];//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| else | |
| { | |
| *ptr++ = 0;//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| } | |
| //memcpy(ptr, &RGB48[xposi*3], (width-xposi)*3*2); | |
| //ptr += (width-xposi)*3; | |
| for(x=Rxposi;x<width;x++) | |
| { | |
| *ptr++ = RGB48[x*3];//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<Rxposi+16;x++) | |
| { | |
| *ptr++ = 0;//r | |
| ptr++;//g | |
| ptr++;//b | |
| } | |
| } | |
| if(negG == 0) | |
| { | |
| unsigned short *ptr = scanline; | |
| int nwidth = width-Gxposi+16; | |
| if(nwidth > width) | |
| nwidth = width; | |
| for(x=0;x<Gxposi+2;x++) | |
| { | |
| ptr++;//r | |
| *ptr++ = 0;//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<nwidth;x++) | |
| { | |
| ptr++;//r | |
| *ptr++ = RGB48[x*3+1];//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<16;x++) | |
| { | |
| ptr++;//r | |
| *ptr++ = 0;//g | |
| ptr++;//b | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptr = scanline; | |
| for(x=0;x<2;x++) | |
| { | |
| if(x+Gxposi-2>=0) | |
| { | |
| ptr++;//r | |
| *ptr++ = RGB48[(x+Gxposi-2)*3+1];//g | |
| ptr++;//b | |
| } | |
| else | |
| { | |
| ptr++;//r | |
| *ptr++ = 0;//g | |
| ptr++;//b | |
| } | |
| } | |
| //memcpy(ptr, &RGB48[xposi*3], (width-xposi)*3*2); | |
| //ptr += (width-xposi)*3; | |
| for(x=Gxposi;x<width;x++) | |
| { | |
| ptr++;//r | |
| *ptr++ = RGB48[x*3+1];//g | |
| ptr++;//b | |
| } | |
| for(x=0;x<Gxposi+16;x++) | |
| { | |
| ptr++;//r | |
| *ptr++ = 0;//g | |
| ptr++;//b | |
| } | |
| } | |
| if(negB == 0) | |
| { | |
| unsigned short *ptr = scanline; | |
| int nwidth = width-Bxposi+16; | |
| if(nwidth > width) | |
| nwidth = width; | |
| for(x=0;x<Bxposi+2;x++) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = 0;//b | |
| } | |
| for(x=0;x<nwidth;x++) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = RGB48[x*3+2];//b | |
| } | |
| for(x=0;x<16;x++) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| else | |
| { | |
| unsigned short *ptr = scanline; | |
| for(x=0;x<2;x++) | |
| { | |
| if(x+Bxposi-2>=0) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = RGB48[(x+Bxposi-2)*3+2];//b | |
| } | |
| else | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| //memcpy(ptr, &RGB48[xposi*3], (width-xposi)*3*2); | |
| //ptr += (width-xposi)*3; | |
| for(x=Bxposi;x<width;x++) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = RGB48[x*3+2];//b | |
| } | |
| for(x=0;x<Bxposi+16;x++) | |
| { | |
| ptr++;//r | |
| ptr++;//g | |
| *ptr++ = 0;//b | |
| } | |
| } | |
| gA1 = _mm_set_epi16(RgainA,GgainA,BgainA,RgainA,GgainA,BgainA,RgainA,GgainA); | |
| gA2 = _mm_set_epi16(BgainA,RgainA,GgainA,BgainA,RgainA,GgainA,BgainA,RgainA); | |
| gA3 = _mm_set_epi16(GgainA,BgainA,RgainA,GgainA,BgainA,RgainA,GgainA,BgainA); | |
| gB1 = _mm_set_epi16(RgainB,GgainB,BgainB,RgainB,GgainB,BgainB,RgainB,GgainB); | |
| gB2 = _mm_set_epi16(BgainB,RgainB,GgainB,BgainB,RgainB,GgainB,BgainB,RgainB); | |
| gB3 = _mm_set_epi16(GgainB,BgainB,RgainB,GgainB,BgainB,RgainB,GgainB,BgainB); | |
| gC1 = _mm_set_epi16(RgainC,GgainC,BgainC,RgainC,GgainC,BgainC,RgainC,GgainC); | |
| gC2 = _mm_set_epi16(BgainC,RgainC,GgainC,BgainC,RgainC,GgainC,BgainC,RgainC); | |
| gC3 = _mm_set_epi16(GgainC,BgainC,RgainC,GgainC,BgainC,RgainC,GgainC,BgainC); | |
| gD1 = _mm_set_epi16(RgainD,GgainD,BgainD,RgainD,GgainD,BgainD,RgainD,GgainD); | |
| gD2 = _mm_set_epi16(BgainD,RgainD,GgainD,BgainD,RgainD,GgainD,BgainD,RgainD); | |
| gD3 = _mm_set_epi16(GgainD,BgainD,RgainD,GgainD,BgainD,RgainD,GgainD,BgainD); | |
| line128 = (__m128i *)&scanline[0]; | |
| //outline128 = line128; | |
| outline128 = (__m128i *)&RGB48[0]; | |
| //l1 = load128;//r1,g1,b1,r2,g2,b2,r3,g3, | |
| //l2 = load128;//b3,r4,g4,b4,r5,g5,b5,r6 | |
| //l3 = load128;//g6,b6,r7,g7,b7,r8,g8,b8 | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13) | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l2 = _mm_loadu_si128(line128++); | |
| l3 = _mm_loadu_si128(line128++); | |
| shift = 0; | |
| } | |
| else | |
| { | |
| l1 = _mm_loadu_si128(line128++); | |
| l1 = _mm_srli_epi16(l1,3); //13-bit unsigned | |
| l2 = _mm_loadu_si128(line128++); | |
| l2 = _mm_srli_epi16(l2,3); //13-bit unsigned | |
| l3 = _mm_loadu_si128(line128++); | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| shift = 3; | |
| } | |
| for(x=0;x<width*3; x+=8) | |
| { | |
| //o=l1* gainA | |
| o128 = _mm_mulhi_epi16(l1, gA1); | |
| //t1 = l1<<3*16 //t1 = r2,g2,b2,r3,g3, 0 0 0 | |
| //t2 = l2>>16*5 //t2 = 0 0 0 0 0 b3,r4,g4 | |
| //t1 += t2; //t1 = r2,g2,b2,r3,g3,b3,r4,g4 | |
| //l1 = t1 //l1 = r2,g2,b2,r3,g3,b3,r4,g4 | |
| //t1 *= gainB | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_slli_si128(l2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gB1); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<3*16 //t1 = r3,g3,b3,r4,g4 0 0 0 | |
| //t2 = l2<<3*16; //t2 = b4,r5,g5,b5,r6 0 0 0 | |
| //t2 >>= 5*16; //t2 = 0 0 0 0 0 b4,r5,g5 | |
| //t1 += t2 //t1 = r3,g3,b3,r4,g4,b4,r5,g5 | |
| //l1 = t1 //l1 = r3,g3,b3,r4,g4,b4,r5,g5 | |
| //t1 *= gainC | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_srli_si128(l2,3*2); | |
| t2 = _mm_slli_si128(t2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| l1 = t1; | |
| t1 = _mm_mulhi_epi16(t1, gC1); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| //t1 = l1<<3*16 //t1 = r4,g4,b4,r5,g5 0 0 0 | |
| //t2 = l2<<6*16 //t2 = b5,r6 0 0 0 0 0 0 | |
| //t2 >>= 5 * 16; //t2 = 0 0 0 0 0 b5,r6 0 | |
| //t1 += t2 //t1 = r4,g4,b4,r5,g5,b5,r6, 0 | |
| //t2 = l3>>7*16 //t2 = 0 0 0 0 0 0 0 g6 | |
| //t1 += t2 //t1 = r4,g4,b4,r5,g5,b5,r6,g6 | |
| //t1 *= gainD | |
| //o += t1 | |
| t1 = _mm_srli_si128(l1,3*2); | |
| t2 = _mm_srli_si128(l2,6*2); | |
| t2 = _mm_slli_si128(t2,5*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| t2 = _mm_slli_si128(l3,7*2); | |
| t1 = _mm_adds_epi16(t1,t2); | |
| t1 = _mm_mulhi_epi16(t1, gD1); | |
| o128 = _mm_adds_epi16(o128,t1); | |
| t1 = gA1; | |
| gA1 = gA2; | |
| gA2 = gA3; | |
| gA3 = t1; | |
| t1 = gB1; | |
| gB1 = gB2; | |
| gB2 = gB3; | |
| gB3 = t1; | |
| t1 = gC1; | |
| gC1 = gC2; | |
| gC2 = gC3; | |
| gC3 = t1; | |
| t1 = gD1; | |
| gD1 = gD2; | |
| gD2 = gD3; | |
| gD3 = t1; | |
| l1 = l2; | |
| l2 = l3; | |
| l3 = _mm_loadu_si128(line128++); | |
| if(shift) | |
| { | |
| l3 = _mm_srli_epi16(l3,3); //13-bit unsigned | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x0fff)); | |
| o128 = _mm_slli_epi16(o128,4); | |
| } | |
| else | |
| { | |
| // upper limit to 32767 | |
| o128 = _mm_adds_epi16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_subs_epu16(o128, _mm_set1_epi16(0x7fff - 0x3fff)); | |
| o128 = _mm_slli_epi16(o128,1); | |
| } | |
| _mm_storeu_si128(outline128++, o128); | |
| } | |
| } | |
| void HistogramLine(DECODER *decoder, unsigned short *sbase, int width, int format, int whitepoint) | |
| { | |
| int x,val,ypos=0,upos=1,vpos=3; | |
| int step = 1,pos=0; | |
| short *ssbase = (short *)sbase; | |
| uint32_t *lbase = (uint32_t *)sbase; | |
| ToolsHandle *tools = decoder->tools; | |
| int scaledvectorscope = 0; | |
| if(tools == NULL) | |
| return; | |
| if(whitepoint == 13) | |
| { | |
| if(format == DECODED_FORMAT_RG64) | |
| format = DECODED_FORMAT_W13A; | |
| else | |
| format = DECODED_FORMAT_WP13; | |
| } | |
| while(width/step > 360) | |
| { | |
| step*=2; | |
| } | |
| tools->waveformWidth = width/step; | |
| decoder->tools->blurUVdone = 0; | |
| switch(format & 0xffffff) | |
| { | |
| case DECODED_FORMAT_WP13: | |
| decoder->tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| R = ssbase[0]>>5; | |
| G = ssbase[1]>>5; | |
| B = ssbase[2]>>5; | |
| if(R > 255) R = 255; | |
| if(R < 0) R = 0; | |
| if(G > 255) G = 255; | |
| if(G < 0) G = 0; | |
| if(B > 255) B = 255; | |
| if(B < 0) B = 0; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| ssbase += step*3; | |
| } | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| R = ssbase[0]>>5; | |
| G = ssbase[1]>>5; | |
| B = ssbase[2]>>5; | |
| if(R > 255) R = 255; | |
| if(R < 0) R = 0; | |
| if(G > 255) G = 255; | |
| if(G < 0) G = 0; | |
| if(B > 255) B = 255; | |
| if(B < 0) B = 0; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| ssbase += step*4; | |
| } | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| R = sbase[0]>>8; | |
| G = sbase[1]>>8; | |
| B = sbase[2]>>8; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| sbase += step*3; | |
| } | |
| break; | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_RG30: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| val = lbase[x]; | |
| R = (val>>22)&0xff; | |
| G = (val>>12)&0xff; | |
| B = (val>>02)&0xff; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case DECODED_FORMAT_AR10: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| val = lbase[x]; | |
| B = (val>>22)&0xff; | |
| G = (val>>12)&0xff; | |
| R = (val>>02)&0xff; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case DECODED_FORMAT_R210: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| val = SwapInt32BtoN(lbase[x]); | |
| R = (val>>22)&0xff; | |
| G = (val>>12)&0xff; | |
| B = (val>>02)&0xff; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case DECODED_FORMAT_DPX0: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| val = SwapInt32BtoN(lbase[x]); | |
| R = (val>>24)&0xff; | |
| G = (val>>14)&0xff; | |
| B = (val>>04)&0xff; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_B64A: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int32_t R,G,B,U,V; | |
| R = sbase[1]>>8; | |
| G = sbase[2]>>8; | |
| B = sbase[3]>>8; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| sbase += step*4; | |
| } | |
| break; | |
| case COLOR_FORMAT_UYVY: | |
| ypos=1,upos=0,vpos=2; | |
| case DECODED_FORMAT_CbYCrY_8bit: // CMD: 20100109 | |
| case COLOR_FORMAT_YUYV: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int Y,U,V,R,G,B; | |
| uint8_t *bptr = (uint8_t *)sbase; | |
| bptr += x * 2; | |
| Y = bptr[ypos]-16; | |
| U = bptr[upos]-128; | |
| Y+= bptr[ypos+2]-16; Y>>=1; | |
| V = bptr[vpos]-128; | |
| R = (9535*Y + 14688*V)>>13; //13-bit white | |
| G = (9535*Y - 4375*V - 1745*U)>>13; | |
| B = (9535*Y + 17326*U)>>13; | |
| //TODO much -20 to 120 RGB range. | |
| if(R > 255) R = 255; | |
| if(R < 0) R = 0; | |
| if(G > 255) G = 255; | |
| if(G < 0) G = 0; | |
| if(B > 255) B = 255; | |
| if(B < 0) B = 0; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| if(scaledvectorscope) | |
| { | |
| U *= 255; U /= 314; | |
| V *= 255; V /= 244; | |
| } | |
| //* 255.0/314.0 | |
| //* 255.0/244.0 | |
| U += 128; | |
| V += 128; | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case COLOR_FORMAT_YU64: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int Y,U,V,R,G,B; | |
| uint8_t *bptr = (uint8_t *)sbase; | |
| bptr += x * 4; | |
| bptr++; //read only the high byte out of the 16-bit | |
| Y = bptr[0]-16; | |
| V = bptr[2]-128; | |
| Y+= bptr[4]-16; Y>>=1; | |
| U = bptr[6]-128; | |
| R = (9535*Y + 14688*V)>>13; //13-bit white | |
| G = (9535*Y - 4375*V - 1745*U)>>13; | |
| B = (9535*Y + 17326*U)>>13; | |
| if(R > 255) R = 255; | |
| if(R < 0) R = 0; | |
| if(G > 255) G = 255; | |
| if(G < 0) G = 0; | |
| if(B > 255) B = 255; | |
| if(B < 0) B = 0; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| if(scaledvectorscope) | |
| { | |
| U *= 255; U /= 314; | |
| V *= 255; V /= 244; | |
| } | |
| U += 128; | |
| V += 128; | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case COLOR_FORMAT_V210: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int Y,U,V,R,G,B; | |
| uint32_t *lptr = (uint32_t *)sbase; | |
| lptr += (x/6)*4; | |
| switch(x % 6) | |
| { | |
| case 0: | |
| V = ((*lptr>>02) & 0xff) - 128; | |
| Y = ((*lptr>>12) & 0xff) - 16; | |
| U = ((*lptr>>22) & 0xff) - 128; | |
| lptr++; | |
| Y+= ((*lptr>>02) & 0xff) - 16; Y>>=1; | |
| break; | |
| case 1: | |
| lptr++; | |
| Y = ((*lptr>>02) & 0xff) - 16; | |
| V = ((*lptr>>12) & 0xff) - 128; | |
| Y+= ((*lptr>>22) & 0xff) - 16; Y>>=1; | |
| lptr--; | |
| U = ((*lptr>>22) & 0xff) - 128; | |
| break; | |
| case 2: | |
| lptr++; | |
| Y = ((*lptr>>22) & 0xff) - 16; | |
| lptr++; | |
| U = ((*lptr>>02) & 0xff) - 128; | |
| Y+= ((*lptr>>12) & 0xff) - 16; Y>>=1; | |
| V = ((*lptr>>22) & 0xff) - 128; | |
| break; | |
| case 3: | |
| lptr++; | |
| V = ((*lptr>>12) & 0xff) - 128; | |
| lptr++; | |
| U = ((*lptr>>02) & 0xff) - 128; | |
| Y = ((*lptr>>12) & 0xff) - 16; | |
| lptr++; | |
| Y+= ((*lptr>>02) & 0xff) - 16; Y>>=1; | |
| break; | |
| case 4: | |
| lptr+=2; | |
| V = ((*lptr>>22) & 0xff) - 128; | |
| lptr++; | |
| Y = ((*lptr>>02) & 0xff) - 16; | |
| U = ((*lptr>>12) & 0xff) - 128; | |
| Y+= ((*lptr>>22) & 0xff) - 16; Y>>=1; | |
| break; | |
| case 5: | |
| lptr+=2; | |
| V = ((*lptr>>22) & 0xff) - 128; | |
| lptr++; | |
| U = ((*lptr>>12) & 0xff) - 128; | |
| Y = ((*lptr>>22) & 0xff) - 16; | |
| lptr++; | |
| Y+= ((*lptr>>02) & 0xff) - 16; Y>>=1; | |
| break; | |
| } | |
| R = (9535*Y + 14688*V)>>13; //13-bit white | |
| G = (9535*Y - 4375*V - 1745*U)>>13; | |
| B = (9535*Y + 17326*U)>>13; | |
| if(R > 255) R = 255; | |
| if(R < 0) R = 0; | |
| if(G > 255) G = 255; | |
| if(G < 0) G = 0; | |
| if(B > 255) B = 255; | |
| if(B < 0) B = 0; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| if(scaledvectorscope) | |
| { | |
| U *= 255; U /= 314; | |
| V *= 255; V /= 244; | |
| } | |
| U += 128; | |
| V += 128; | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB24: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int R,G,B,U,V; | |
| uint8_t *bptr = (uint8_t *)sbase; | |
| bptr += x * 3; | |
| R = bptr[2]; | |
| G = bptr[1]; | |
| B = bptr[0]; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB32: | |
| tools->histogram = 1; | |
| for(x=0,pos=0; x<width; x+=step,pos++) | |
| { | |
| int R,G,B,U,V; | |
| uint8_t *bptr = (uint8_t *)sbase; | |
| bptr += x * 4; | |
| R = bptr[2]; | |
| G = bptr[1]; | |
| B = bptr[0]; | |
| tools->histR[R]++; | |
| tools->histG[G]++; | |
| tools->histB[B]++; | |
| tools->waveR[pos][R]++; | |
| tools->waveG[pos][G]++; | |
| tools->waveB[pos][B]++; | |
| //Y = (((1499 * R) + (5030 * G) + (508 * B))>>13) + 16; | |
| if(scaledvectorscope) | |
| { | |
| U = ((((-672* R) - (2249 * G) + (2920* B))>>13)) + 128; //* 255.0/314.0 | |
| V = ((((3758* R) - (3416 * G) - (343 * B))>>13)) + 128; //* 255.0/244.0 | |
| } | |
| else | |
| { | |
| U = ((((-827* R) - (2769 * G) + (3596* B))>>13)) + 128; | |
| V = ((((3596* R) - (3269 * G) - (328 * B))>>13)) + 128; | |
| } | |
| if(U<0) U=0; if(U>255) U=255; | |
| if(V<0) V=0; if(V>255) V=255; | |
| tools->scopeUV[U][V]++; | |
| } | |
| break; | |
| case COLOR_FORMAT_BYR2: | |
| case COLOR_FORMAT_BYR4: | |
| //do nothing | |
| break; | |
| default: | |
| assert(0); | |
| #if (0 && DEBUG) | |
| fprintf(stderr,"decoder.HistogramLine: Unsupported pixel format\n"); | |
| #endif | |
| break; | |
| } | |
| } | |
| void GhostBust(DECODER *decoder, unsigned short *sbaseL, unsigned short *sbaseR, int width, int ileakL, int ileakR) | |
| { | |
| #if 1 | |
| int x,RL,GL,BL,RR,GR,BR; | |
| int nRL,nGL,nBL; | |
| int nRR,nGR,nBR; | |
| int max = 1024*1024-1; | |
| unsigned short *sqrttable = decoder->sqrttable; | |
| ileakL>>=6; | |
| ileakR>>=6; | |
| if(sqrttable == NULL) | |
| return; | |
| for(x=0;x<width;x++) | |
| { | |
| RL = sbaseL[0]>>6; | |
| GL = sbaseL[1]>>6; //10-bit | |
| BL = sbaseL[2]>>6; | |
| RL*=RL; | |
| GL*=GL; //20-bit | |
| BL*=BL; | |
| RR = sbaseR[0]>>6; | |
| GR = sbaseR[1]>>6; //10-bit | |
| BR = sbaseR[2]>>6; | |
| RR*=RR; | |
| GR*=GR; //20-bit | |
| BR*=BR; | |
| nRL = RL*(1023-ileakL) + ileakL*max - RR*ileakL; //30-bit | |
| nGL = GL*(1023-ileakL) + ileakL*max - GR*ileakL; | |
| nBL = BL*(1023-ileakL) + ileakL*max - BR*ileakL; | |
| nRL >>= 10; //20-bit | |
| nGL >>= 10; | |
| nBL >>= 10; | |
| if(nRL>max) nRL=max; if(nRL<0) nRL=0; | |
| if(nGL>max) nGL=max; if(nGL<0) nGL=0; | |
| if(nBL>max) nBL=max; if(nBL<0) nBL=0; | |
| if(sqrttable[nRL] == 65535) | |
| sqrttable[nRL] = (int)sqrt(nRL); | |
| if(sqrttable[nGL] == 65535) | |
| sqrttable[nGL] = (int)sqrt(nGL); | |
| if(sqrttable[nBL] == 65535) | |
| sqrttable[nBL] = (int)sqrt(nBL); | |
| sbaseL[0] = sqrttable[nRL]<<6; | |
| sbaseL[1] = sqrttable[nGL]<<6; | |
| sbaseL[2] = sqrttable[nBL]<<6; | |
| sbaseL += 3; | |
| nRR = RR*(1023-ileakR) + ileakR*max - RL*ileakR; //30-bit | |
| nGR = GR*(1023-ileakR) + ileakR*max - GL*ileakR; | |
| nBR = BR*(1023-ileakR) + ileakR*max - BL*ileakR; | |
| nRR >>= 10; //20-bit | |
| nGR >>= 10; | |
| nBR >>= 10; | |
| if(nRR>max) nRR=max; if(nRR<0) nRR=0; | |
| if(nGR>max) nGR=max; if(nGR<0) nGR=0; | |
| if(nBR>max) nBR=max; if(nBR<0) nBR=0; | |
| if(sqrttable[nRR] == 65535) | |
| sqrttable[nRR] = (int)sqrt(nRR); | |
| if(sqrttable[nGR] == 65535) | |
| sqrttable[nGR] = (int)sqrt(nGR); | |
| if(sqrttable[nBR] == 65535) | |
| sqrttable[nBR] = (int)sqrt(nBR); | |
| sbaseR[0] = sqrttable[nRR]<<6; | |
| sbaseR[1] = sqrttable[nGR]<<6; | |
| sbaseR[2] = sqrttable[nBR]<<6; | |
| sbaseR += 3; | |
| } | |
| #else // works and fast but has not image linearization, not as good | |
| __m128i *ptrL = (__m128i *)sbaseL; | |
| __m128i *ptrR = (__m128i *)sbaseR; | |
| __m128i t,L,R,nL,nR; | |
| int x,width8 = (width*3) & ~7; | |
| __m128i white_epi16 = _mm_set1_epi16(32767); | |
| __m128i leak_epi16 = _mm_set1_epi16(ileak>>1); | |
| __m128i oneNegLeak_epi16 = _mm_set1_epi16(32767-(ileak>>1)); | |
| for(x=0;x<width8;x+=8) | |
| { | |
| L = _mm_load_si128(ptrL); | |
| R = _mm_load_si128(ptrR); | |
| L = _mm_srli_epi16(L,1); //15-bit | |
| R = _mm_srli_epi16(R,1); //15-bit | |
| nL = _mm_mulhi_epi16(L, oneNegLeak_epi16); | |
| t = _mm_mulhi_epi16(white_epi16, leak_epi16); | |
| nL = _mm_adds_epi16(nL, t); | |
| t = _mm_mulhi_epi16(R, leak_epi16); | |
| nL = _mm_subs_epu16(nL, t); | |
| nR = _mm_mulhi_epi16(R, oneNegLeak_epi16); | |
| t = _mm_mulhi_epi16(white_epi16, leak_epi16); | |
| nR = _mm_adds_epi16(nR, t); | |
| t = _mm_mulhi_epi16(L, leak_epi16); | |
| nR = _mm_subs_epu16(nR, t); | |
| L = _mm_slli_epi16(nL,2); | |
| R = _mm_slli_epi16(nR,2); | |
| _mm_store_si128(ptrL++, L); | |
| _mm_store_si128(ptrR++, R); | |
| } | |
| #endif | |
| } | |
| void GhostBustRC(DECODER *decoder, unsigned short *sbase, int width, int ileakL, int ileakR) | |
| { | |
| #if 1 | |
| int x,R,G,B; | |
| int nR,nG,nB; | |
| int max = 1024*1024-1; | |
| unsigned short *sqrttable = decoder->sqrttable; | |
| ileakL>>=6; | |
| ileakR>>=6; | |
| if(sqrttable == NULL) | |
| return; | |
| for(x=0;x<width;x++) | |
| { | |
| R = sbase[0]>>6; | |
| G = sbase[1]>>6; //10-bit | |
| B = sbase[2]>>6; | |
| R*=R; | |
| G*=G; //20-bit | |
| B*=B; | |
| nR = R*(1023-ileakL) + ileakL*max - ((G+B)>>1)*ileakL; //30-bit | |
| nG = G*(1023-ileakR) + ileakR*max - R*ileakR; | |
| nB = B*(1023-ileakR) + ileakR*max - R*ileakR; | |
| nR >>= 10; //20-bit | |
| nG >>= 10; | |
| nB >>= 10; | |
| if(nR>max) nR=max; if(nR<0) nR=0; | |
| if(nG>max) nG=max; if(nG<0) nG=0; | |
| if(nB>max) nB=max; if(nB<0) nB=0; | |
| if(sqrttable[nR] == 65535) | |
| sqrttable[nR] = (int)sqrt(nR); | |
| if(sqrttable[nG] == 65535) | |
| sqrttable[nG] = (int)sqrt(nG); | |
| if(sqrttable[nB] == 65535) | |
| sqrttable[nB] = (int)sqrt(nB); | |
| sbase[0] = sqrttable[nR]<<6; | |
| sbase[1] = sqrttable[nG]<<6; | |
| sbase[2] = sqrttable[nB]<<6; | |
| sbase += 3; | |
| } | |
| #elif 0 | |
| int x; | |
| float R,G,B; | |
| float nR,nG,nB; | |
| float fleakL = (float)ileakL / 65535.0; | |
| float fleakR = (float)ileakR / 65535.0; | |
| for(x=0;x<width;x++) | |
| { | |
| R = sbase[0]; | |
| G = sbase[1]; | |
| B = sbase[2]; | |
| R /= 65535.0; | |
| G /= 65535.0; | |
| B /= 65535.0; | |
| R *= R; | |
| G *= G; | |
| B *= B; | |
| nR = R*(1.0-fleakL) + fleakL - (G+B)*0.5*fleakL; | |
| nG = G*(1.0-fleakR) + fleakR - R*fleakR; | |
| nB = B*(1.0-fleakR) + fleakR - R*fleakR; | |
| if(nR<0) nR=0; | |
| if(nG<0) nG=0; | |
| if(nB<0) nB=0; | |
| nR = sqrt(nR); | |
| nG = sqrt(nG); | |
| nB = sqrt(nB); | |
| sbase[0] = nR * 65535.0; | |
| sbase[1] = nG * 65535.0; | |
| sbase[2] = nB * 65535.0; | |
| sbase += 3; | |
| } | |
| #elif 0 | |
| __m128i RGBRGB,rgb_epi32,RGB1,RGB2; | |
| __m128i zero_epi128 = _mm_setzero_si128(); | |
| int x,width6 = (width*3) / 6 * 6; | |
| __m128 white_ps = _mm_set1_ps(1.0); | |
| __m128 mul_neg_leak_ps = _mm_set_ps(1.0 - ((float)ileakL/65536.0), 1.0 - ((float)ileakR/65536.0), 1.0 - ((float)ileakR/65536.0), 1.0 - ((float)ileakL/65536.0)); | |
| __m128 leak_ps = _mm_set_ps((float)ileakL/65536.0, (float)ileakR/65536.0, (float)ileakR/65536.0, (float)ileakL/65536.0); | |
| __m128 scale_ps = _mm_set1_ps(65535.0); | |
| __m128 scalehalf_ps = _mm_set1_ps(32767.0); | |
| __m128 zero_ps = _mm_set1_ps(0.0); | |
| __m128 rgb_ps, alt_rgb_ps; | |
| __m128i sub_epi32; | |
| __m128 sub_ps; | |
| for(x=0;x<width6;x+=6) // two RGB pairs | |
| { | |
| int R,G,B; | |
| RGBRGB = _mm_loadu_si128((__m128i *)sbase); | |
| R = _mm_extract_epi16(RGBRGB, 0); | |
| G = _mm_extract_epi16(RGBRGB, 1); | |
| B = _mm_extract_epi16(RGBRGB, 2); | |
| G+=B; | |
| G>>=1; | |
| sub_epi32 = _mm_set_epi32(G,R,R,G); | |
| sub_ps = _mm_cvtepi32_ps(sub_epi32); // range 0 to 65535.0 | |
| sub_ps = _mm_div_ps(sub_ps, scale_ps); // range 0 to 1.0 | |
| sub_ps = _mm_mul_ps(sub_ps, sub_ps); // square | |
| rgb_epi32 = _mm_unpacklo_epi16(RGBRGB, zero_epi128); | |
| rgb_ps = _mm_cvtepi32_ps(rgb_epi32); // range 0 to 65535.0 | |
| rgb_ps = _mm_div_ps(rgb_ps, scale_ps); // range 0 to 1.0 | |
| rgb_ps = _mm_mul_ps(rgb_ps, rgb_ps); // square | |
| rgb_ps = _mm_mul_ps(rgb_ps, mul_neg_leak_ps); // [R*(1.0-fleakL)] + fleakL - (G+B)*0.5*fleakL; | |
| rgb_ps = _mm_add_ps(rgb_ps, leak_ps); // R*(1.0-fleakL) [+ fleakL] - (G+B)*0.5*fleakL; | |
| sub_ps = _mm_mul_ps(sub_ps, leak_ps); // R*(1.0-fleakL) + fleakL - [(G+B)*0.5*fleakL;] | |
| rgb_ps = _mm_sub_ps(rgb_ps, sub_ps); // R*(1.0-fleakL) + fleakL] [- (G+B)*0.5*fleakL;] | |
| rgb_ps = _mm_max_ps(rgb_ps, zero_ps); // if(x < 0) x= 0; | |
| rgb_ps = _mm_sqrt_ps(rgb_ps); // sqrt() | |
| rgb_ps = _mm_mul_ps(rgb_ps, scalehalf_ps); // range 0 to 32767 | |
| RGB1 = _mm_cvtps_epi32(rgb_ps); | |
| RGB1 = _mm_packs_epi32 (RGB1, zero_epi128); | |
| RGB1 = _mm_slli_si128(RGB1, 10); | |
| RGB1 = _mm_srli_si128(RGB1, 10); | |
| RGBRGB = _mm_srli_si128(RGBRGB, 6); | |
| R = _mm_extract_epi16(RGBRGB, 0); | |
| G = _mm_extract_epi16(RGBRGB, 1); | |
| B = _mm_extract_epi16(RGBRGB, 2); | |
| G+=B; | |
| G>>=1; | |
| sub_epi32 = _mm_set_epi32(G,R,R,G); | |
| sub_ps = _mm_cvtepi32_ps(sub_epi32); // range 0 to 65535.0 | |
| sub_ps = _mm_div_ps(sub_ps, scale_ps); // range 0 to 1.0 | |
| sub_ps = _mm_mul_ps(sub_ps, sub_ps); // square | |
| rgb_epi32 = _mm_unpacklo_epi16(RGBRGB, zero_epi128); | |
| rgb_ps = _mm_cvtepi32_ps(rgb_epi32); // range 0 to 65535.0 | |
| rgb_ps = _mm_div_ps(rgb_ps, scale_ps); // range 0 to 1.0 | |
| rgb_ps = _mm_mul_ps(rgb_ps, rgb_ps); // square | |
| rgb_ps = _mm_mul_ps(rgb_ps, mul_neg_leak_ps); // [R*(1.0-fleakL)] + fleakL - (G+B)*0.5*fleakL; | |
| rgb_ps = _mm_add_ps(rgb_ps, leak_ps); // R*(1.0-fleakL) [+ fleakL] - (G+B)*0.5*fleakL; | |
| sub_ps = _mm_mul_ps(sub_ps, leak_ps); // R*(1.0-fleakL) + fleakL - [(G+B)*0.5*fleakL;] | |
| rgb_ps = _mm_sub_ps(rgb_ps, sub_ps); // R*(1.0-fleakL) + fleakL] [- (G+B)*0.5*fleakL;] | |
| rgb_ps = _mm_max_ps(rgb_ps, zero_ps); // if(x < 0) x= 0; | |
| rgb_ps = _mm_sqrt_ps(rgb_ps); // sqrt() | |
| rgb_ps = _mm_mul_ps(rgb_ps, scalehalf_ps); // range 0 to 32767 | |
| RGB2 = _mm_cvtps_epi32(rgb_ps); | |
| RGB2 = _mm_packs_epi32 (RGB2, zero_epi128); | |
| RGB2 = _mm_slli_si128(RGB2, 6); | |
| RGB1 = _mm_adds_epi16(RGB1, RGB2); | |
| RGB1 = _mm_slli_epi16(RGB1, 1); | |
| RGB1 = _mm_slli_si128(RGB1, 4); | |
| RGB1 = _mm_srli_si128(RGB1, 4); | |
| RGBRGB = _mm_srli_si128(RGBRGB, 6); | |
| RGBRGB = _mm_slli_si128(RGBRGB, 12); | |
| RGBRGB = _mm_adds_epi16(RGB1, RGBRGB); | |
| _mm_storeu_si128((__m128i *)sbase, RGBRGB); | |
| sbase += 6; | |
| } | |
| #endif | |
| } | |
| void GhostBustAB(DECODER *decoder, unsigned short *sbase, int width, int ileakL, int ileakR) | |
| { | |
| int x,R,G,B; | |
| int nR,nG,nB; | |
| int max = 1024*1024-1; | |
| unsigned short *sqrttable = decoder->sqrttable; | |
| ileakL>>=6; | |
| ileakR>>=6; | |
| if(sqrttable == NULL) | |
| return; | |
| for(x=0;x<width;x++) | |
| { | |
| R = sbase[0]>>6; | |
| G = sbase[1]>>6; //10-bit | |
| B = sbase[2]>>6; | |
| R*=R; | |
| G*=G; //20-bit | |
| B*=B; | |
| nR = R*(1023-ileakL) + ileakL*max - B*ileakL; | |
| nG = G*(1023-ileakL) + ileakL*max - B*ileakL; | |
| nB = B*(1023-ileakR) + ileakR*max - ((R+G)>>1)*ileakR; | |
| nR >>= 10; //20-bit | |
| nG >>= 10; | |
| nB >>= 10; | |
| if(nR>max) nR=max; if(nR<0) nR=0; | |
| if(nG>max) nG=max; if(nG<0) nG=0; | |
| if(nB>max) nB=max; if(nB<0) nB=0; | |
| if(sqrttable[nR] == 65535) | |
| sqrttable[nR] = (int)sqrt(nR); | |
| if(sqrttable[nG] == 65535) | |
| sqrttable[nG] = (int)sqrt(nG); | |
| if(sqrttable[nB] == 65535) | |
| sqrttable[nB] = (int)sqrt(nB); | |
| sbase[0] = sqrttable[nR]<<6; | |
| sbase[1] = sqrttable[nG]<<6; | |
| sbase[2] = sqrttable[nB]<<6; | |
| sbase += 3; | |
| } | |
| } | |
| void GhostBustGM(DECODER *decoder, unsigned short *sbase, int width, int ileakL, int ileakR) | |
| { | |
| int x,R,G,B; | |
| int nR,nG,nB; | |
| int max = 1024*1024-1; | |
| unsigned short *sqrttable = decoder->sqrttable; | |
| ileakL>>=6; | |
| ileakR>>=6; | |
| if(sqrttable == NULL) | |
| return; | |
| for(x=0;x<width;x++) | |
| { | |
| R = sbase[0]>>6; | |
| G = sbase[1]>>6; //10-bit | |
| B = sbase[2]>>6; | |
| R*=R; | |
| G*=G; //20-bit | |
| B*=B; | |
| nR = R*(1023-ileakL) + ileakL*max - G*ileakL; | |
| nG = G*(1023-ileakR) + ileakR*max - ((R+B)>>1)*ileakR; | |
| nB = B*(1023-ileakL) + ileakL*max - G*ileakL; | |
| nR >>= 10; //20-bit | |
| nG >>= 10; | |
| nB >>= 10; | |
| if(nR>max) nR=max; if(nR<0) nR=0; | |
| if(nG>max) nG=max; if(nG<0) nG=0; | |
| if(nB>max) nB=max; if(nB<0) nB=0; | |
| if(sqrttable[nR] == 65535) | |
| sqrttable[nR] = (int)sqrt(nR); | |
| if(sqrttable[nG] == 65535) | |
| sqrttable[nG] = (int)sqrt(nG); | |
| if(sqrttable[nB] == 65535) | |
| sqrttable[nB] = (int)sqrt(nB); | |
| sbase[0] = sqrttable[nR]<<6; | |
| sbase[1] = sqrttable[nG]<<6; | |
| sbase[2] = sqrttable[nB]<<6; | |
| sbase += 3; | |
| } | |
| } | |
| void ProcessLine3D(DECODER *decoder, uint8_t *buffer, int bufferremain, uint8_t *output, int pitch, uint8_t *source_buffer, int source_pitch, int channel_offset, int y, int blank) | |
| { | |
| uint16_t *scratchline,*scratchline2,*scratchline3; | |
| uint16_t *sptr; | |
| uint16_t *srclineA,*srclineB; | |
| uint16_t *dstlineA,*dstlineB; | |
| int x,y2; | |
| int width = decoder->frame.width; | |
| int height = decoder->frame.height; | |
| int skip = 3; | |
| int sskip = 3; | |
| uint8_t *bptr1; | |
| uint8_t *bptr2; | |
| uint8_t *baseptr1; | |
| uint8_t *baseptr2; | |
| float windowMaskL = decoder->cfhddata.channel[0].FloatingWindowMaskL; | |
| float windowMaskR = decoder->cfhddata.channel[0].FloatingWindowMaskR; | |
| float frameTilt = decoder->cfhddata.channel[0].FrameTilt; | |
| float horizOffset = decoder->cfhddata.channel[1].HorizontalOffset; | |
| float horizOffsetR = decoder->cfhddata.channel[2].HorizontalOffset; | |
| float rotOffset = decoder->cfhddata.channel[1].RotationOffset; | |
| float rotOffsetR = decoder->cfhddata.channel[2].RotationOffset; | |
| float horizOffsetStep = 0; | |
| float horizOffsetStepR = 0; | |
| int flip1=0,flip2=0; | |
| int channel_flip = decoder->cfhddata.channel_flip; | |
| int source_pitch1 = source_pitch; | |
| int source_pitch2 = source_pitch; | |
| uint8_t *outputline = output+y*pitch; | |
| uint8_t *outputline2 = NULL; | |
| float horizOffsetBase; | |
| float rotOffsetBase; | |
| float horizOffsetBaseR; | |
| float rotOffsetBaseR; | |
| int formatdone = 0; | |
| float xmin = decoder->cfhddata.channel[0].FrameMask.topLftX; | |
| float xmax = decoder->cfhddata.channel[0].FrameMask.topRgtX; | |
| //float ymin = decoder->cfhddata.channel[0].FrameMask.topLftY; | |
| float ymax = decoder->cfhddata.channel[0].FrameMask.botLftY; | |
| float zoom; | |
| float zoomR; | |
| float frameZoom1 = decoder->cfhddata.channel[1].FrameZoom; | |
| float frameZoom2 = decoder->cfhddata.channel[2].FrameZoom; | |
| float frameAutoZoom = decoder->cfhddata.channel[0].FrameAutoZoom; | |
| float frameDiffZoom1 = decoder->cfhddata.channel[1].FrameDiffZoom; | |
| float frameDiffZoom2 = decoder->cfhddata.channel[2].FrameDiffZoom; | |
| float frameHDynamic = decoder->cfhddata.FrameHDynamic; | |
| float frameHDynCenter = decoder->cfhddata.FrameHDynCenter; | |
| float frameHDynWidth = decoder->cfhddata.FrameHDynWidth; | |
| float frameHScale = decoder->cfhddata.FrameHScale; | |
| int alphachannel = 0; | |
| int whitepoint = 16; | |
| float blursharpenL = decoder->cfhddata.channel[1].user_blur_sharpen; | |
| float blursharpenR = decoder->cfhddata.channel[2].user_blur_sharpen; | |
| float vignette = decoder->cfhddata.channel[0].user_vignette_start; | |
| int flip_LR = 0; | |
| float vig_r1; | |
| float vig_r2; | |
| float vig_gain; | |
| if(blank) // blankline, no shifts required | |
| { | |
| windowMaskL = 0; | |
| windowMaskR = 0; | |
| frameTilt = 0; | |
| horizOffset = 0; | |
| horizOffsetR = 0; | |
| rotOffset = 0; | |
| rotOffsetR = 0; | |
| frameZoom1 = 1.0; | |
| frameZoom2 = 1.0; | |
| frameAutoZoom = 1.0; | |
| frameDiffZoom1 = 1.0; | |
| frameDiffZoom2 = 1.0; | |
| frameHScale = 1.0; | |
| frameHDynamic = 1.0; | |
| frameHDynCenter = 0.5; | |
| frameHDynWidth = 0.0; | |
| } | |
| if( decoder->StereoBufferFormat == DECODED_FORMAT_RG64 || | |
| decoder->StereoBufferFormat == DECODED_FORMAT_W13A || | |
| decoder->StereoBufferFormat == DECODED_FORMAT_RGB32) | |
| alphachannel = 1; | |
| if(xmax == 0.0) xmax = 1.0; | |
| if(ymax == 0.0) ymax = 1.0; | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| width *= 2; | |
| } | |
| if(decoder->source_channels < 2) // 2D | |
| { | |
| channel_flip &= 0x3; | |
| channel_flip |= channel_flip<<2; | |
| decoder->cfhddata.channel_flip = channel_flip; | |
| } | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_COLORMATRIX) || | |
| decoder->frame.resolution == DECODED_RESOLUTION_QUARTER || | |
| decoder->frame.resolution == DECODED_RESOLUTION_LOWPASS_ONLY || | |
| decoder->frame.resolution == DECODED_RESOLUTION_QUARTER_NODEBAYER_SCALED) | |
| { | |
| blursharpenL = 0.0; | |
| blursharpenR = 0.0; | |
| } | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_ORIENTATION)) | |
| { | |
| horizOffset = rotOffset = 0; | |
| horizOffsetR = rotOffsetR = 0; | |
| frameTilt = 0; | |
| frameAutoZoom = 1.0; | |
| frameDiffZoom1 = 1.0; | |
| frameDiffZoom2 = 1.0; | |
| } | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_IMAGEFLIPS)) | |
| { | |
| channel_flip = 0; | |
| } | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) | |
| { | |
| horizOffset += decoder->cfhddata.FrameOffsetX; | |
| horizOffsetR -= decoder->cfhddata.FrameOffsetX; | |
| frameZoom1 += frameHScale - 1.0f; | |
| frameZoom2 += frameHScale - 1.0f; | |
| if(frameHDynamic != 1.0) | |
| { | |
| frameZoom1 += 0.00001f; | |
| frameZoom2 += 0.00001f; | |
| } | |
| if(vignette != 0.0) | |
| { | |
| float vig_diag = sqrtf(1.0f + ((float)decoder->frame.height / (float) decoder->frame.width) * ((float)decoder->frame.height / (float) decoder->frame.width)); | |
| vig_r1 = (vignette+1.0f); | |
| vig_r2 = (decoder->cfhddata.channel[0].user_vignette_end+1.0f); | |
| vig_gain = decoder->cfhddata.channel[0].user_vignette_gain; | |
| vig_r1 *= vig_diag; | |
| vig_r2 *= vig_diag; | |
| } | |
| } | |
| else | |
| { | |
| frameZoom1 = 1.0f; | |
| frameZoom2 = 1.0f; | |
| vignette = 0; | |
| } | |
| zoom = frameZoom1 * frameAutoZoom * frameDiffZoom1; | |
| if(frameDiffZoom2 != 0.0) | |
| zoomR = frameZoom2 * frameAutoZoom / frameDiffZoom2; | |
| else | |
| zoomR = 0.0; | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) | |
| { | |
| if(decoder->cfhddata.InvertOffset) | |
| { | |
| rotOffset = -rotOffset; | |
| rotOffsetR = -rotOffsetR; | |
| rotOffset -= decoder->cfhddata.FrameOffsetR; | |
| rotOffsetR -= -decoder->cfhddata.FrameOffsetR; | |
| } | |
| else | |
| { | |
| rotOffset += decoder->cfhddata.FrameOffsetR; | |
| rotOffsetR += -decoder->cfhddata.FrameOffsetR; | |
| } | |
| } | |
| rotOffsetBase = rotOffset; | |
| horizOffsetBase = horizOffset; | |
| rotOffsetBaseR = rotOffsetR; | |
| horizOffsetBaseR = horizOffsetR; | |
| horizOffset -= rotOffset * 0.5f; | |
| horizOffsetStep = rotOffset / (float)height; | |
| horizOffsetR -= rotOffsetR * 0.5f; | |
| horizOffsetStepR = rotOffsetR / (float)height; | |
| horizOffset += horizOffsetStep * y; | |
| horizOffsetR += horizOffsetStepR * y; | |
| assert(bufferremain >= width * 8 * 2 * 2); | |
| baseptr1 = source_buffer; | |
| baseptr2 = source_buffer + channel_offset; | |
| if(channel_flip & 0xf) | |
| { | |
| if(channel_flip & 1) | |
| { | |
| flip1 = 1; | |
| } | |
| if(channel_flip & 4) | |
| { | |
| flip2 = 1; | |
| } | |
| } | |
| if(source_pitch1 < 0) | |
| flip_LR = 1; | |
| decoder->sharpen_flip = 0; | |
| if(channel_flip & 2) //ProcessLine3D | |
| { | |
| if(decoder->channel_blend_type == BLEND_NONE && decoder->channel_current == 1) // right channel only (stored in baseptr1) | |
| { | |
| } | |
| else | |
| { | |
| baseptr1 += source_pitch1*(height-1); | |
| source_pitch1 = -source_pitch1; | |
| decoder->sharpen_flip = 1; | |
| } | |
| } | |
| if(channel_flip & 8) | |
| { | |
| if(decoder->channel_blend_type == BLEND_NONE && decoder->channel_current == 1) // right channel only (stored in baseptr1) | |
| { | |
| baseptr1 += source_pitch1*(height-1); | |
| source_pitch1 = -source_pitch1; | |
| decoder->sharpen_flip = 1; | |
| } | |
| else | |
| { | |
| baseptr2 += source_pitch2*(height-1); | |
| source_pitch2 = -source_pitch2; | |
| } | |
| } | |
| bptr1 = baseptr1 + y*source_pitch1; | |
| bptr2 = baseptr2 + y*source_pitch2; | |
| y2 = y; | |
| if(decoder->channel_blend_type == BLEND_FREEVIEW) //FreeView | |
| { | |
| if(y2 < height/4) | |
| { | |
| blank = 1; | |
| y2 = 0; | |
| } | |
| else | |
| { | |
| y2 -= height/4; | |
| y2 *= 2; | |
| if(y2 >= height-1) | |
| { | |
| blank = 1; | |
| y2 = height - 2; | |
| } | |
| } | |
| bptr1 = baseptr1 + y2*source_pitch1; | |
| bptr2 = baseptr2 + y2*source_pitch2; | |
| } | |
| srclineA = (uint16_t *)bptr1; | |
| srclineB = (uint16_t *)bptr2; | |
| scratchline = (uint16_t *)buffer; | |
| scratchline2 = (uint16_t *)(buffer + width * 6 + width) /* as we pad the line */ ;; | |
| scratchline3 = (uint16_t *)(buffer + width * 6*2 + width*2) /* as we pad the line */ ; | |
| if(alphachannel) | |
| { | |
| scratchline = (uint16_t *)buffer; | |
| scratchline2 = (uint16_t *)(buffer + width * 8 + width) /* as we pad the line */ ;; | |
| scratchline3 = (uint16_t *)(buffer + width * 8*2 + width*2) /* as we pad the line */ ; | |
| } | |
| dstlineA = sptr = scratchline; | |
| dstlineB = scratchline3; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RG64: | |
| whitepoint = 16; | |
| skip = 8; | |
| sskip = 4; | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| whitepoint = 13; | |
| skip = 8; | |
| sskip = 4; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| whitepoint = 13; | |
| skip = 6; | |
| sskip = 3; | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| skip = 6; | |
| sskip = 3; | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| skip = 4; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| skip = 3; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| skip = 2; | |
| break; | |
| } | |
| if(blank) | |
| { | |
| if(srclineA) | |
| memset(srclineA, 0, width*skip); | |
| if(srclineB && decoder->channel_decodes > 1) | |
| memset(srclineB, 0, width*skip); | |
| } | |
| if(blursharpenL != 0.0 || blursharpenR != 0.0) | |
| { | |
| if(decoder->channel_blend_type == BLEND_FREEVIEW || | |
| decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC || | |
| decoder->channel_blend_type == BLEND_LINE_INTERLEAVED | |
| ) | |
| { | |
| decoder->doVerticalFilter = 0; | |
| } | |
| else | |
| { | |
| decoder->doVerticalFilter = 1; | |
| } | |
| } | |
| { | |
| switch(decoder->channel_blend_type) | |
| { | |
| case BLEND_FREEVIEW: | |
| case BLEND_SIDEBYSIDE_ANAMORPHIC: //side by side | |
| if(!blank) | |
| { | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL || decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| { | |
| dstlineA = srclineA; | |
| sptr = dstlineA; | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(!alphachannel) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width/2, -horizOffset, flip1); | |
| RGB48HoriShift(decoder, srclineB, scratchline2, width/2, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width/2, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline2, width/2, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGBA64HoriShift(decoder, srclineA, scratchline2, width/2, -horizOffset, flip1); | |
| RGBA64HoriShift(decoder, srclineB, scratchline2, width/2, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGBA64HoriShiftZoom(decoder, srclineA, scratchline2, width/2, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGBA64HoriShiftZoom(decoder, srclineB, scratchline2, width/2, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| int cwidth= width/2; | |
| if(decoder->channel_blend_type == BLEND_SIDEBYSIDE_ANAMORPHIC) | |
| cwidth= width; | |
| FastVignetteInplaceWP13(decoder, width/2, cwidth, height, y, vig_r1, vig_r2, vig_gain, | |
| (int16_t *)srclineA, decoder->frame.resolution, skip); | |
| FastVignetteInplaceWP13(decoder, width/2, cwidth, height, y, vig_r1, vig_r2, vig_gain, | |
| (int16_t *)srclineB, decoder->frame.resolution, skip); | |
| } | |
| if(blursharpenL != 0.0) FastSharpeningBlurHinplaceWP13(width/2, (int16_t *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| if(blursharpenR != 0.0) FastSharpeningBlurHinplaceWP13(width/2, (int16_t *)srclineB, blursharpenR, decoder->frame.resolution, skip); | |
| memcpy(dstlineA+sskip*(width/2), srclineB, width/2*sskip*2); | |
| } | |
| else | |
| { | |
| int16_t *ptr; | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(!alphachannel) | |
| { | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGB48HoriShift(decoder, srclineB, scratchline2, width, horizOffset, flip2); | |
| } | |
| else | |
| { | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| else | |
| { | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGBA64HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGBA64HoriShift(decoder, srclineB, scratchline2, width, horizOffset, flip2); | |
| } | |
| else | |
| { | |
| RGBA64HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGBA64HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| int cwidth= width/2; | |
| if(decoder->channel_blend_type == BLEND_SIDEBYSIDE_ANAMORPHIC) | |
| cwidth= width; | |
| FastVignetteInplaceWP13(decoder, width, cwidth, height, y, vig_r1, vig_r2, vig_gain, | |
| (int16_t *)srclineA, decoder->frame.resolution, skip); | |
| FastVignetteInplaceWP13(decoder, width, cwidth, height, y, vig_r1, vig_r2, vig_gain, | |
| (int16_t *)srclineB, decoder->frame.resolution, skip); | |
| } | |
| if(blursharpenL != 0.0) FastSharpeningBlurHinplaceWP13(width, (int16_t *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| if(blursharpenR != 0.0) FastSharpeningBlurHinplaceWP13(width, (int16_t *)srclineB, blursharpenR, decoder->frame.resolution, skip); | |
| dstlineA = srclineA; | |
| ptr = (int16_t *)srclineA; | |
| for(x=0; x<width/2; x++) | |
| { | |
| *ptr++ = (ptr1[0]+ptr1[3])>>1; | |
| *ptr++ = (ptr1[1]+ptr1[4])>>1; | |
| *ptr++ = (ptr1[2]+ptr1[5])>>1 ; | |
| ptr1+=sskip*2; | |
| } | |
| for(; x<width; x++) | |
| { | |
| *ptr++ = (ptr2[0]+ptr2[3])>>1; | |
| *ptr++ = (ptr2[1]+ptr2[4])>>1; | |
| *ptr++ = (ptr2[2]+ptr2[5])>>1; | |
| ptr2+=sskip*2; | |
| } | |
| } | |
| if(windowMaskL || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| RGB48WindowMask(decoder, dstlineA, width/2, 0, mask); | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, dstlineA, width/2, 0, windowMaskL); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, dstlineA, width/2, 1, xmin); | |
| } | |
| } | |
| if(windowMaskR || (1.0-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| RGB48WindowMask(decoder, dstlineA+width*sskip/2, width/2, 1, mask); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, dstlineA+width*sskip/2, width/2, 1, windowMaskR); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, dstlineA+width*sskip/2, width/2, 0, xmin); | |
| } | |
| } | |
| if(decoder->channel_swapped_flags & FLAG3D_GHOSTBUST) | |
| { | |
| if(decoder->ghost_bust_left || decoder->ghost_bust_right) | |
| { | |
| GhostBust(decoder, dstlineA, dstlineA+width*sskip/2, width/2, decoder->ghost_bust_left, decoder->ghost_bust_right); | |
| } | |
| } | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| memcpy(scratchline2+width*sskip/2, dstlineA, width*sskip*2/2); | |
| memcpy(dstlineA, dstlineA+width*sskip/2, width*sskip*2/2); | |
| memcpy(dstlineA+width*sskip/2, scratchline2+width*sskip/2, width*sskip*2/2); | |
| } | |
| } | |
| break; | |
| case BLEND_STACKED_ANAMORPHIC: //stacked | |
| case BLEND_LINE_INTERLEAVED: //fields | |
| if((y & 1) == 1) return; | |
| if(!blank) | |
| { | |
| uint16_t *ptrA1 = (uint16_t *)srclineA; | |
| uint16_t *ptrA2 = (uint16_t *)srclineA + (source_pitch1>>1); | |
| uint16_t *ptrB1 = (uint16_t *)srclineB; | |
| uint16_t *ptrB2 = (uint16_t *)srclineB + (source_pitch2>>1); | |
| FastBlendWP13((short *)ptrA1, (short *)ptrA2, (short *)ptrA1/*output*/, width*skip); | |
| FastBlendWP13((short *)ptrB1, (short *)ptrB2, (short *)ptrB1/*output*/, width*skip); | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffset || horizOffsetR || channel_flip || frameTilt) | |
| { | |
| if(!alphachannel) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGB48HoriShift(decoder, srclineB, scratchline2, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGBA64HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGBA64HoriShift(decoder, srclineB, scratchline2, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGBA64HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGBA64HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineA, decoder->frame.resolution, skip); | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineB, decoder->frame.resolution, skip); | |
| } | |
| if(blursharpenL != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| if(blursharpenR != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineB, blursharpenR, decoder->frame.resolution, skip); | |
| if(windowMaskL || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| RGB48WindowMask(decoder, srclineA, width, 0, mask); | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 0, windowMaskL); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineA, width, 1, xmin); | |
| } | |
| } | |
| if(windowMaskR || (1.0-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| RGB48WindowMask(decoder, srclineB, width, 1, mask); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, srclineB, width, 1, windowMaskR); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineB, width, 0, xmin); | |
| } | |
| } | |
| if(decoder->channel_swapped_flags & FLAG3D_GHOSTBUST) | |
| { | |
| if(decoder->ghost_bust_left || decoder->ghost_bust_right) | |
| { | |
| GhostBust(decoder, srclineA, srclineB, width, decoder->ghost_bust_left, decoder->ghost_bust_right); | |
| } | |
| } | |
| if(decoder->doVerticalFilter == 0) | |
| { | |
| if(decoder->channel_blend_type==BLEND_STACKED_ANAMORPHIC) //stacked | |
| { | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| outputline2 = output+(y>>1)*pitch; | |
| outputline = output+((y>>1)+(height/2))*pitch; | |
| } | |
| else | |
| { | |
| outputline = output+(y>>1)*pitch; | |
| outputline2 = output+((y>>1)+(height/2))*pitch; | |
| } | |
| } | |
| else //fields | |
| { | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| outputline = output+(y)*pitch; | |
| outputline2 = output+(y+1)*pitch; | |
| } | |
| else | |
| { | |
| outputline2 = output+(y)*pitch; | |
| outputline = output+(y+1)*pitch; | |
| } | |
| } | |
| if(flip_LR/*source_pitch1 < 0*/) // flip Left and Right | |
| { | |
| uint8_t *tmp = outputline2; | |
| outputline2 = outputline; | |
| outputline = tmp; | |
| } | |
| } | |
| else | |
| { | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| memcpy(scratchline2, srclineA, width*skip); | |
| memcpy(srclineA, srclineB, width*skip); | |
| memcpy(srclineB, scratchline2, width*skip); | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ONION: //onion | |
| case BLEND_DIFFERENCE: //difference | |
| case BLEND_SPLITVIEW: //splitView | |
| if(!blank) | |
| { | |
| //dstlineA = source_buffer; | |
| //dstlineA += (source_pitch>>1) * y; | |
| sptr = dstlineA = srclineA; | |
| srclineA = (uint16_t *)bptr1; | |
| srclineB = (uint16_t *)bptr2; | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffset || horizOffsetR || channel_flip || frameTilt) | |
| { | |
| if(!alphachannel) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGB48HoriShift(decoder, srclineB, scratchline2, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGBA64HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| RGBA64HoriShift(decoder, srclineB, scratchline2, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGBA64HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGBA64HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineA, decoder->frame.resolution, skip); | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineB, decoder->frame.resolution, skip); | |
| } | |
| if(blursharpenL != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| if(blursharpenR != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineB, blursharpenR, decoder->frame.resolution, skip); | |
| if(windowMaskL || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| RGB48WindowMask(decoder, srclineA, width, 0, mask); | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 0, windowMaskL); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineA, width, 1, xmin); | |
| } | |
| } | |
| if(windowMaskR || (1.0-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| RGB48WindowMask(decoder, srclineB, width, 1, mask); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, srclineB, width, 1, windowMaskR); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineB, width, 0, xmin); | |
| } | |
| } | |
| x = 0; | |
| if(decoder->channel_blend_type == BLEND_SPLITVIEW) //split view | |
| { | |
| int xsplit = width * (decoder->cfhddata.split_pos_xy & 0xff) / 255; | |
| for(x = xsplit*sskip; x<width*sskip; x++) | |
| { | |
| srclineA[x] = srclineB[x]; | |
| } | |
| } | |
| else if(decoder->channel_blend_type == BLEND_ONION) //onion | |
| { | |
| FastBlendWP13((short *)srclineA, (short *)srclineB, (short *)dstlineA/*output*/, width*skip); | |
| } | |
| else if(decoder->channel_blend_type == BLEND_DIFFERENCE) //difference | |
| { | |
| #if XMMOPT | |
| int width8 = (width*sskip) & 0xfff8; | |
| __m128i mid_epi16; | |
| //int unaligned = ((int)sbase) & 15; | |
| //unaligned += ((int)in_rgb8) & 15; | |
| if(whitepoint == 13) | |
| mid_epi16 = _mm_set1_epi16(0x0fff); | |
| else | |
| mid_epi16 = _mm_set1_epi16(0x1fff); | |
| for(x=0; x<width8; x+=8) | |
| { | |
| __m128i rgb16A = _mm_load_si128((__m128i *)&srclineA[x]); | |
| __m128i rgb16B = _mm_load_si128((__m128i *)&srclineB[x]); | |
| // 0 to 0xffff | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| rgb16A = _mm_subs_epi16(rgb16B, rgb16A); // -3fff to 3fff | |
| } | |
| else | |
| { | |
| rgb16A = _mm_subs_epi16(rgb16A, rgb16B); | |
| } | |
| rgb16A = _mm_adds_epi16(rgb16A, mid_epi16); // -0x1fff to 0x5fff , avg 0x1fff | |
| _mm_store_si128((__m128i *)&dstlineA[x], rgb16A); | |
| } | |
| #endif | |
| for(; x<width*sskip; x++) | |
| { | |
| int val; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| val = (srclineB[x] - srclineA[x]) + 32768; | |
| } | |
| else | |
| { | |
| val = (srclineA[x] - srclineB[x]) + 32768; | |
| } | |
| if(val > 0x7fff) val = 0x7fff; | |
| if(val < 0) val = 0; | |
| dstlineA[x] = val; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_RC: | |
| case BLEND_ANAGLYPH_RC_BW: | |
| case BLEND_ANAGLYPH_AB: | |
| case BLEND_ANAGLYPH_AB_BW: | |
| case BLEND_ANAGLYPH_GM: | |
| case BLEND_ANAGLYPH_GM_BW: | |
| case BLEND_ANAGLYPH_DUBOIS: //Optimized | |
| { | |
| uint16_t *sptr1 = scratchline2; | |
| uint16_t *sptr2 = scratchline3; | |
| dstlineA = (uint16_t *)bptr1; | |
| // dstlineA += (source_pitch>>1) * y; | |
| sptr = dstlineA; | |
| sptr1 = srclineA = (uint16_t *)bptr1; | |
| sptr2 = srclineB = (uint16_t *)bptr2; | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffset || horizOffsetR || channel_flip || frameTilt) | |
| { | |
| if(!alphachannel) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGB48HoriShift(decoder, srclineA, scratchline, width, -horizOffset, flip1); | |
| RGB48HoriShift(decoder, srclineB, scratchline, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| { | |
| RGBA64HoriShift(decoder, scratchline2, scratchline, width, -horizOffset, flip1); | |
| RGBA64HoriShift(decoder, scratchline3, scratchline, width, horizOffsetR, flip2); | |
| } | |
| else | |
| { | |
| RGBA64HoriShiftZoom(decoder, scratchline2, scratchline, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| RGBA64HoriShiftZoom(decoder, scratchline3, scratchline, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineA, decoder->frame.resolution, skip); | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (short *)srclineB, decoder->frame.resolution, skip); | |
| } | |
| if(blursharpenL != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| if(blursharpenR != 0.0) FastSharpeningBlurHinplaceWP13(width, (short *)srclineB, blursharpenR, decoder->frame.resolution, skip); | |
| if(decoder->channel_swapped_flags & FLAG3D_GHOSTBUST) | |
| { | |
| if(decoder->ghost_bust_left || decoder->ghost_bust_right) | |
| { | |
| GhostBust(decoder, srclineA, srclineB, width, decoder->ghost_bust_left, decoder->ghost_bust_right); | |
| } | |
| } | |
| if(windowMaskL || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| RGB48WindowMask(decoder, srclineA, width, 0, mask); | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 0, windowMaskL); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineA, width, 1, xmin); | |
| } | |
| } | |
| if(windowMaskR || (1.0-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| RGB48WindowMask(decoder, srclineB, width, 1, mask); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, srclineB, width, 1, windowMaskR); | |
| if(xmin) | |
| { | |
| RGB48WindowMask(decoder, srclineB, width, 0, xmin); | |
| } | |
| } | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| uint16_t *tmp = srclineA; | |
| srclineA = srclineB; | |
| srclineB = tmp; | |
| } | |
| switch(decoder->channel_blend_type) | |
| { | |
| case BLEND_ANAGLYPH_RC: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr2[0]; | |
| sptr[1] = ptr1[1]; | |
| sptr[2] = ptr1[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr1[0]; | |
| sptr[1] = ptr2[1]; | |
| sptr[2] = ptr2[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_RC_BW: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y2; | |
| sptr[1] = y1; | |
| sptr[2] = y1; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y1; | |
| sptr[1] = y2; | |
| sptr[2] = y2; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_AB: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr2[0]; | |
| sptr[1] = ptr2[1]; | |
| sptr[2] = ptr1[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr1[0]; | |
| sptr[1] = ptr1[1]; | |
| sptr[2] = ptr2[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_AB_BW: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y2; | |
| sptr[1] = y2; | |
| sptr[2] = y1; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y1; | |
| sptr[1] = y1; | |
| sptr[2] = y2; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_GM: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr1[0]; | |
| sptr[1] = ptr2[1]; | |
| sptr[2] = ptr1[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| sptr[0] = ptr2[0]; | |
| sptr[1] = ptr1[1]; | |
| sptr[2] = ptr2[2]; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_GM_BW: | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| if(decoder->channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y1; | |
| sptr[1] = y2; | |
| sptr[2] = y1; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<width; x++) | |
| { | |
| int y1 = (ptr1[0]*5+ptr1[1]*10+ptr1[2])>>4; | |
| int y2 = (ptr2[0]*5+ptr2[1]*10+ptr2[2])>>4; | |
| sptr[0] = y2; | |
| sptr[1] = y1; | |
| sptr[2] = y2; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| } | |
| break; | |
| case BLEND_ANAGLYPH_DUBOIS: //Optimized | |
| { | |
| int16_t *ptr1 = (int16_t *)srclineA; | |
| int16_t *ptr2 = (int16_t *)srclineB; | |
| int r,g,b; | |
| for(x=0; x<width; x++) | |
| { | |
| r =(ptr1[0]*456 + ptr1[1]*500 + ptr1[2]*176 + ptr2[0]*-43 + ptr2[1]*-88 + ptr2[2]*-2 ) / 1000; | |
| g =(ptr1[0]*-40 + ptr1[1]*-38 + ptr1[2]*-16 + ptr2[0]*378 + ptr2[1]*734 + ptr2[2]*-18 ) / 1000; | |
| b =(ptr1[0]*-15 + ptr1[1]*-21 + ptr1[2]*-5 + ptr2[0]*-72 + ptr2[1]*-113+ ptr2[2]*1226) / 1000; | |
| if(r<0) r=0; if(r>0x3fff) r=0x3fff; | |
| if(g<0) g=0; if(g>0x3fff) g=0x3fff; | |
| if(b<0) b=0; if(b>0x3fff) b=0x3fff; | |
| sptr[0] = r; | |
| sptr[1] = g; | |
| sptr[2] = b; | |
| ptr1 += sskip; | |
| ptr2 += sskip; | |
| sptr += sskip; | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| break; | |
| case BLEND_NONE: | |
| default: | |
| if(decoder->channel_decodes == 1) // only one channel | |
| { | |
| if(skip == 8) | |
| { | |
| //the data is already in the correct format | |
| sptr = (unsigned short *)bptr1; | |
| // shift if needed. | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(decoder->channel_current == 0) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGBA64HoriShift(decoder, sptr, scratchline2, width, -horizOffset, flip1); | |
| else | |
| RGBA64HoriShiftZoom(decoder, sptr, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGBA64HoriShift(decoder, sptr, scratchline2, width, horizOffsetR, flip2); | |
| else | |
| RGBA64HoriShiftZoom(decoder, sptr, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| } | |
| else if(skip == 6) | |
| { | |
| //the data is already in the correct format | |
| dstlineA = sptr = (unsigned short *)srclineA; | |
| // shift if needed. | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(decoder->channel_current == 0) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| else | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| } | |
| else | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, horizOffsetR, flip2); | |
| else | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| } | |
| if(vignette != 0.0) | |
| { | |
| FastVignetteInplaceWP13(decoder, width, width, height, y, vig_r1, vig_r2, vig_gain, | |
| (int16_t *)srclineA, decoder->frame.resolution, skip); | |
| } | |
| if(decoder->channel_current == 0) | |
| { | |
| if(blursharpenL != 0.0) | |
| { | |
| FastSharpeningBlurHinplaceWP13(width, (int16_t *)srclineA, blursharpenL, decoder->frame.resolution, skip); | |
| } | |
| } | |
| else | |
| { | |
| if(blursharpenR != 0.0) | |
| { | |
| FastSharpeningBlurHinplaceWP13(width, (int16_t *)srclineA, blursharpenR, decoder->frame.resolution, skip); | |
| } | |
| } | |
| } | |
| if ((windowMaskL && decoder->channel_current == 0) || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| if(decoder->channel_current != 0) mask = xmin; | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 0, windowMaskL); | |
| RGB48WindowMask(decoder, srclineA, width, 0, mask); | |
| } | |
| if ((windowMaskR && decoder->channel_current == 1) || (1.0f-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| if(decoder->channel_current != 1) mask = (1.0f-xmax); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 1, windowMaskR); | |
| RGB48WindowMask(decoder, srclineA, width, 1, mask); | |
| } | |
| } | |
| else | |
| { | |
| outputline2 = output+(y+height)*pitch; | |
| if(zoom != 1.0 || zoomR != 1.0 || horizOffsetR || horizOffset || channel_flip || frameTilt) | |
| { | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGB48HoriShift(decoder, srclineA, scratchline2, width, -horizOffset, flip1); | |
| else | |
| RGB48HoriShiftZoom(decoder, srclineA, scratchline2, width, height, y, -horizOffsetBase, rotOffsetBase, zoom, flip1, frameTilt, 0); | |
| if(zoom == 1.0 && zoomR == 1.0 && frameTilt == 0.0) | |
| RGB48HoriShift(decoder, srclineB, scratchline2, width, horizOffset, flip2); | |
| else | |
| RGB48HoriShiftZoom(decoder, srclineB, scratchline2, width, height, y, horizOffsetBaseR, -rotOffsetBaseR, zoomR, flip2, frameTilt, 1); | |
| } | |
| if(windowMaskL || xmin) | |
| { | |
| float mask = windowMaskL > xmin ? windowMaskL : xmin; | |
| RGB48WindowMask(decoder, srclineA, width, 0, mask); | |
| if(windowMaskL < 0) | |
| RGB48WindowMask(decoder, srclineA, width, 0, windowMaskL); | |
| } | |
| if(windowMaskR || (1.0-xmax)) | |
| { | |
| float mask = windowMaskR > (1.0f-xmax) ? windowMaskR : (1.0f-xmax); | |
| RGB48WindowMask(decoder, srclineB, width, 1, mask); | |
| if(windowMaskR < 0) | |
| RGB48WindowMask(decoder, srclineB, width, 1, windowMaskR); | |
| } | |
| if(decoder->channel_swapped_flags & FLAG3D_GHOSTBUST) | |
| { | |
| if(decoder->ghost_bust_left || decoder->ghost_bust_right) | |
| { | |
| GhostBust(decoder, srclineA, srclineB, width, decoder->ghost_bust_left, decoder->ghost_bust_right); | |
| } | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| if(!formatdone) | |
| { | |
| int flags = ACTIVEMETADATA_PRESATURATED; | |
| int whitebitdepth = 16; | |
| if(decoder->StereoBufferFormat == DECODED_FORMAT_WP13 || decoder->StereoBufferFormat == DECODED_FORMAT_W13A) | |
| { | |
| flags = 0; | |
| whitebitdepth = 13; | |
| } | |
| if(outputline2) | |
| { | |
| // if(decoder->cfhddata.ComputeFlags&2 && (0 == (y&3)) && decoder->tools) | |
| // HistogramLine(decoder, srclineA, width, DECODED_FORMAT_RG48, whitebitdepth); | |
| if(decoder->doVerticalFilter == 0) // No sharp stage so output now | |
| { | |
| if(alphachannel) | |
| Convert4444LinesToOutput(decoder, width, 1, y, srclineA, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| else | |
| ConvertLinesToOutput(decoder, width, 1, y, srclineA, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| //if(decoder->cfhddata.ComputeFlags&2 && (0 == (y&3)) && decoder->tools) | |
| // HistogramLine(decoder, dstlineA, width, DECODED_FORMAT_RG48, whitebitdepth); | |
| if(alphachannel) | |
| Convert4444LinesToOutput(decoder, width, 1, y, srclineB, | |
| outputline2, pitch, decoder->frame.format, whitebitdepth, flags); | |
| else | |
| ConvertLinesToOutput(decoder, width, 1, y, srclineB, | |
| outputline2, pitch, decoder->frame.format, whitebitdepth, flags); | |
| } | |
| } | |
| else | |
| { | |
| //if(decoder->cfhddata.ComputeFlags&2 && (0 == (y&3)) && decoder->tools) | |
| //{ | |
| // if(alphachannel) | |
| // HistogramLine(decoder, srclineA, width, DECODED_FORMAT_RG64, whitebitdepth); | |
| // else | |
| // HistogramLine(decoder, srclineA, width, DECODED_FORMAT_RG48, whitebitdepth); | |
| //} | |
| if(decoder->doVerticalFilter == 0) // No sharp stage so output now | |
| { | |
| if(alphachannel) | |
| Convert4444LinesToOutput(decoder, width, 1, y, srclineA, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| else | |
| ConvertLinesToOutput(decoder, width, 1, y, srclineA, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| } | |
| } | |
| } | |
| } | |
| void SharpenLine(DECODER *decoder, uint8_t *buffer, int bufferremain, uint8_t *output, int pitch, uint8_t *local_output, int local_pitch, int channel_offset, int y, int thread_index) | |
| { | |
| uint16_t *sbase;//*sbase2 = NULL; | |
| int width = decoder->frame.width; | |
| int height = decoder->frame.height; | |
| int skip = 3; | |
| //int flip1=0;//flip2=0; | |
| int channel_flip = decoder->cfhddata.channel_flip; | |
| //int local_pitch1 = local_pitch; | |
| //int local_pitch2 = local_pitch; | |
| uint8_t *outputline = output+y*pitch; | |
| //uint8_t *outputline2 = NULL; | |
| short *scratch; | |
| //int formatdone = 0; | |
| //float xmin = decoder->cfhddata.channel[0].FrameMask.topLftX; | |
| //float xmax = decoder->cfhddata.channel[0].FrameMask.topRgtX; | |
| //float ymin = decoder->cfhddata.channel[0].FrameMask.topLftY; | |
| //float ymax = decoder->cfhddata.channel[0].FrameMask.botLftY; | |
| int alphachannel = 0; | |
| float blursharpen = 0; | |
| int line_max = decoder->frame.height; | |
| int yy = y; | |
| if(decoder->channel_current == 0) | |
| blursharpen = decoder->cfhddata.channel[1].user_blur_sharpen; // TODO LEFT and RIGHT separate vertical sharpen | |
| else | |
| blursharpen = decoder->cfhddata.channel[2].user_blur_sharpen; // TODO LEFT and RIGHT separate vertical sharpen | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_COLORMATRIX)|| | |
| decoder->frame.resolution == DECODED_RESOLUTION_QUARTER || | |
| decoder->frame.resolution == DECODED_RESOLUTION_LOWPASS_ONLY || | |
| decoder->frame.resolution == DECODED_RESOLUTION_QUARTER_NODEBAYER_SCALED) | |
| { | |
| blursharpen = 0.0; | |
| } | |
| if(decoder->channel_mix_half_res == 1) | |
| line_max *= 2; | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_IMAGEFLIPS)) | |
| { | |
| channel_flip = 0; | |
| } | |
| if(decoder->sharpen_flip) //SharpenLine | |
| { | |
| //if(!(decoder->channel_blend_type == BLEND_NONE && decoder->channel_current == 1)) // right channel only (stored in baseptr1) | |
| { | |
| yy = (line_max - 1 - y); | |
| outputline = output+yy*pitch; | |
| } | |
| } | |
| if( decoder->StereoBufferFormat == DECODED_FORMAT_RG64 || | |
| decoder->StereoBufferFormat == DECODED_FORMAT_W13A || | |
| decoder->StereoBufferFormat == DECODED_FORMAT_RGB32) | |
| alphachannel = 1; | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| width *= 2; | |
| } | |
| sbase = (uint16_t *)local_output; | |
| sbase += (local_pitch>>1) * y; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_W13A: | |
| skip = 8; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| skip = 6; | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| skip = 6; | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| skip = 4; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| skip = 3; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| skip = 2; | |
| break; | |
| } | |
| scratch = (short*)(buffer + width * skip * thread_index); | |
| { | |
| int flags = ACTIVEMETADATA_PRESATURATED; | |
| int whitebitdepth = 16; | |
| if((decoder->StereoBufferFormat == DECODED_FORMAT_WP13 || decoder->StereoBufferFormat == DECODED_FORMAT_W13A)) | |
| { | |
| int use_pitch = local_pitch; | |
| int edgeclose = 0; | |
| flags = 0; | |
| whitebitdepth = 13; | |
| if(blursharpen != 0.0 && local_pitch != 0) | |
| { | |
| short *Aptr,*Bptr,*Cptr,*Dptr,*Eptr; | |
| switch(decoder->channel_blend_type) | |
| { | |
| case BLEND_STACKED_ANAMORPHIC: | |
| sbase = (uint16_t *)local_output; | |
| sbase += (local_pitch>>1) * y * 2; | |
| if(y<=4) edgeclose = 1; | |
| if(y>=2) Aptr = (short *)sbase - (local_pitch>>1) * 4; else Aptr = (short *)sbase; | |
| if(y>=1) Bptr = (short *)sbase - (local_pitch>>1) * 2; else Bptr = (short *)sbase; | |
| Cptr = (short *)sbase; | |
| if(y<height-1) Dptr = (short *)sbase + (local_pitch>>1) * 2; else Dptr = (short *)sbase; | |
| if(y<height-2) Eptr = (short *)sbase + (local_pitch>>1) * 4; else Eptr = (short *)sbase; | |
| if(y>=height-4) edgeclose = 1; | |
| use_pitch = local_pitch * 2; | |
| break; | |
| case BLEND_LINE_INTERLEAVED: | |
| sbase = (uint16_t *)local_output; | |
| if(y & 1) | |
| { | |
| y--; | |
| sbase += (local_pitch>>1) * y; | |
| } | |
| else | |
| { | |
| sbase += (local_pitch>>1) * y; | |
| sbase += channel_offset>>1; | |
| } | |
| if(y<=8) edgeclose = 1; | |
| if(y>=4) Aptr = (short *)sbase - (local_pitch>>1) * 4; else Aptr = (short *)sbase; | |
| if(y>=2) Bptr = (short *)sbase - (local_pitch>>1) * 2; else Bptr = (short *)sbase; | |
| Cptr = (short *)sbase; | |
| if(y<height-2) Dptr = (short *)sbase + (local_pitch>>1) * 2; else Dptr = (short *)sbase; | |
| if(y<height-4) Eptr = (short *)sbase + (local_pitch>>1) * 4; else Eptr = (short *)sbase; | |
| if(y>=height-8) edgeclose = 1; | |
| use_pitch = local_pitch * 2; | |
| break; | |
| default: | |
| if(y<=4) edgeclose = 1; | |
| if(y>=2) Aptr = (short *)sbase - (local_pitch>>1) * 2; else Aptr = (short *)sbase; | |
| if(y>=1) Bptr = (short *)sbase - (local_pitch>>1) * 1; else Bptr = (short *)sbase; | |
| Cptr = (short *)sbase; | |
| if(y<height-1) Dptr = (short *)sbase + (local_pitch>>1) * 1; else Dptr = (short *)sbase; | |
| if(y<height-2) Eptr = (short *)sbase + (local_pitch>>1) * 2; else Eptr = (short *)sbase; | |
| if(y>=height-4) edgeclose = 1; | |
| use_pitch = local_pitch; | |
| break; | |
| } | |
| if(skip == 8) | |
| { | |
| FastSharpeningBlurVW13A(Aptr, Bptr, Cptr, Dptr, Eptr, use_pitch, edgeclose, | |
| scratch, width, blursharpen, | |
| decoder->frame.resolution, | |
| decoder->channel_blend_type); | |
| } | |
| else | |
| { | |
| FastSharpeningBlurVWP13(Aptr, Bptr, Cptr, Dptr, Eptr, use_pitch, edgeclose, | |
| scratch, width, blursharpen, | |
| decoder->frame.resolution, | |
| decoder->channel_blend_type); | |
| } | |
| sbase = (uint16_t *)scratch; | |
| } | |
| } | |
| if(alphachannel) | |
| Convert4444LinesToOutput(decoder, width, 1, y, sbase, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| else | |
| ConvertLinesToOutput(decoder, width, 1, y, sbase, | |
| outputline, pitch, decoder->frame.format, whitebitdepth, flags); | |
| } | |
| } | |
| #if _GRAPHICS | |
| void PaintFrame(DECODER *decoder, uint8_t *output, int pitch, int output_format) | |
| { | |
| int x,y,v,width, height; | |
| int maxR=0,maxG=0,maxB=0; | |
| width = decoder->frame.width; | |
| height = decoder->frame.height; | |
| if(decoder->cfhddata.BurninFlags == 0) | |
| return; | |
| if(decoder->cfhddata.BurninFlags & 2 && decoder->cfhddata.ComputeFlags & ~1) // tools | |
| { | |
| if(decoder->tools == NULL) | |
| { | |
| #if _ALLOCATOR | |
| decoder->tools = (ToolsHandle *)Alloc(decoder->allocator, sizeof(ToolsHandle)); | |
| #else | |
| decoder->tools = (ToolsHandle *)MEMORY_ALLOC(sizeof(ToolsHandle)); | |
| #endif | |
| if(decoder->tools) | |
| { | |
| memset(decoder->tools, 0, sizeof(ToolsHandle)); | |
| } | |
| else | |
| { | |
| return; | |
| } | |
| } | |
| } | |
| decoder->frame.output_format = output_format; | |
| #if _THREADED && 1 | |
| if(decoder->cfhddata.BurninFlags & 2 && decoder->cfhddata.ComputeFlags & ~1 && decoder->tools) // histogram/scopes/waveform | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int workunits; | |
| #if _DELAY_THREAD_START | |
| if(decoder->tools->histogram == 0 && decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| { | |
| int avgR=0,avgG=0,avgB=0; | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| if(height >= 1080) | |
| { | |
| mailbox->pitch = pitch*4; // only read every 4th scan line | |
| workunits = height/4; // only read every 4th scan line | |
| } | |
| else if(height >= 540) | |
| { | |
| mailbox->pitch = pitch*2; // only read every 2th scan line | |
| workunits = height/2; // only read every 2th scan line | |
| } | |
| else | |
| { | |
| mailbox->pitch = pitch; // read every scan line | |
| workunits = height; // read every scan line | |
| } | |
| if(decoder->tools->histogram == 0) | |
| { | |
| mailbox->jobType = JOB_TYPE_HISTOGRAM; // histogram | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| for(x=0;x<256;x++) | |
| { | |
| avgR += decoder->tools->histR[x]; | |
| avgG += decoder->tools->histG[x]; | |
| avgB += decoder->tools->histB[x]; | |
| //if(maxR < decoder->histR[x]) maxR = decoder->histR[x]; | |
| //if(maxG < decoder->histG[x]) maxG = decoder->histG[x]; | |
| //if(maxB < decoder->histB[x]) maxB = decoder->histB[x]; | |
| } | |
| avgR /= 256; | |
| avgG /= 256; | |
| avgB /= 256; | |
| //maxR++; | |
| //maxG++; | |
| //maxB++; | |
| decoder->tools->maxR = avgR*3;//maxR; | |
| decoder->tools->maxG = avgG*3;//maxG; | |
| decoder->tools->maxB = avgB*3;//maxB; | |
| } | |
| } | |
| #endif | |
| if(decoder->cfhddata.BurninFlags && DrawOpen(decoder)) | |
| { | |
| if(decoder->cfhddata.BurninFlags & 3) // overlays / tools | |
| { | |
| #if _THREADED | |
| //DrawInit(decoder); | |
| //DrawStartThreaded(decoder); | |
| if(decoder->draw_thread.pool.thread_count > 0) | |
| { | |
| DrawWaitThreaded(decoder); | |
| } | |
| else | |
| #endif | |
| { | |
| DrawInit(decoder); | |
| DrawMetadataObjects(decoder); | |
| } | |
| } | |
| else | |
| { | |
| DrawInit(decoder); | |
| } | |
| if(decoder->drawSafeMarkers) | |
| DrawSafeMarkers(decoder); | |
| if(decoder->cfhddata.BurninFlags & 2) // tools | |
| { | |
| if(decoder->tools) | |
| { | |
| if(decoder->tools->histogram && decoder->cfhddata.ComputeFlags & 16) | |
| DrawGrid(decoder, 0/*decoder->MDPcurrent.parallax*/); | |
| if(decoder->tools->histogram && decoder->cfhddata.ComputeFlags & 2) | |
| DrawHistogram(decoder, 0/*decoder->MDPcurrent.parallax*/); | |
| if(decoder->tools->histogram && decoder->cfhddata.ComputeFlags & 4) | |
| DrawWaveform(decoder, 0/*decoder->MDPcurrent.parallax*/); | |
| if(decoder->tools->histogram && decoder->cfhddata.ComputeFlags & 8) | |
| DrawVectorscope(decoder, 0/*decoder->MDPcurrent.parallax*/); | |
| } | |
| } | |
| DrawScreen(decoder, output, pitch, output_format); | |
| } | |
| #if 0 | |
| #if _THREADED && 1 | |
| if(decoder->cfhddata.BurninFlags & 2 && decoder->cfhddata.ComputeFlags & 2 && decoder->tools) // histogram | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int workunits; | |
| int targetW, targetH; | |
| if(width < 256 || height < 256) | |
| return; | |
| targetW = width / 4; | |
| targetH = height / 8; | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| workunits = targetW; | |
| mailbox->jobType = JOB_TYPE_BURNINS; // burnin | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| #else | |
| if(decoder->histogram == 0) | |
| { | |
| for(y=0; y<height; y+=4) | |
| { | |
| uint8_t *bptr = output; | |
| bptr += pitch * y; | |
| HistogramLine(decoder, (unsigned short *)bptr, width, output_format); | |
| if(decoder->histogram == 0) | |
| return; // don't know how to create Histogram for that format | |
| } | |
| } | |
| for(x=1;x<255;x++) | |
| { | |
| if(maxR < decoder->histR[x]) maxR = decoder->histR[x]; | |
| if(maxG < decoder->histG[x]) maxG = decoder->histG[x]; | |
| if(maxB < decoder->histB[x]) maxB = decoder->histB[x]; | |
| } | |
| maxR++; | |
| maxG++; | |
| maxB++; | |
| decoder->maxR = maxR; | |
| decoder->maxG = maxG; | |
| decoder->maxB = maxB; | |
| for(x=0; x<targetW; x++) | |
| { | |
| HistogramRender(decoder, output, pitch, output_format, x, targetW, targetH); | |
| } | |
| #endif | |
| #endif | |
| if(decoder->tools) | |
| memset(decoder->tools, 0, sizeof(ToolsHandle)); | |
| } | |
| #endif | |
| extern int geomesh_alloc_cache(void *gm); | |
| #define DEG2RAD(d) (PI*(d)/180.0f) | |
| #define RAD2DEG(r) (180.0f*(r)/PI) | |
| bool approx_equal(int x, int y) | |
| { | |
| if(y > 1080) | |
| { | |
| x >>= 6; | |
| y >>= 6; | |
| } | |
| else if(y > 540) | |
| { | |
| x >>= 5; | |
| y >>= 5; | |
| } else | |
| { | |
| x >>= 4; | |
| y >>= 4; | |
| } | |
| if(x == y || x+1 == y || x == y+1) | |
| return true; | |
| return false; | |
| } | |
| bool approx_equal_float(float x, float y) | |
| { | |
| if (x*0.99 < y && y < x*1.01) | |
| return true; | |
| return false; | |
| } | |
| #if WARPSTUFF | |
| void WarpFrame(DECODER *decoder, uint8_t *output, int pitch, int output_format) | |
| { | |
| int width, height; | |
| //int maxR = 0, maxG = 0, maxB = 0; | |
| int status = WARPLIB_SUCCESS; | |
| CFHDDATA *cfhddata = &decoder->cfhddata; | |
| int backgroundfill = cfhddata->lensFill; | |
| float sensorcrop = 1.0; | |
| float phi, theta, rho; | |
| int srcLens = HERO4; | |
| if (!cfhddata->doMesh) return; | |
| if (decoder->lastLensOffsetX != cfhddata->LensOffsetX || | |
| decoder->lastLensOffsetY != cfhddata->LensOffsetY || | |
| decoder->lastLensOffsetZ != cfhddata->LensOffsetZ || | |
| decoder->lastLensOffsetR != cfhddata->LensOffsetR || | |
| decoder->lastLensZoom != cfhddata->LensZoom || | |
| decoder->lastLensFishFOV != cfhddata->LensFishFOV || | |
| decoder->lastLensGoPro != cfhddata->lensGoPro || | |
| decoder->lastLensSphere != cfhddata->lensSphere || | |
| decoder->lastLensFill != cfhddata->lensFill || | |
| decoder->lastLensStyleSel != cfhddata->lensStyleSel || | |
| memcmp(decoder->lastLensCustomSRC, cfhddata->lensCustomSRC, sizeof(cfhddata->lensCustomSRC)) || | |
| memcmp(decoder->lastLensCustomDST, cfhddata->lensCustomDST, sizeof(cfhddata->lensCustomDST)) ) | |
| { | |
| if (decoder->mesh) | |
| geomesh_destroy(decoder->mesh); | |
| width = decoder->frame.width; | |
| height = decoder->frame.height; | |
| if (approx_equal(width, height * 2)) // approx. 2:1 | |
| { | |
| float outputaspect = 16.0f/9.0f; | |
| srcLens = EQUIRECT; | |
| sensorcrop = 1.00623f; // Fixes the slight calculation error difference between 16x9 with a 4x3, and 16x9 within a 2x1 image. | |
| if (cfhddata->lensCustomSRC[1]) | |
| { | |
| outputaspect = cfhddata->lensCustomSRC[0] / cfhddata->lensCustomSRC[1]; | |
| if (outputaspect >= 1.0f && outputaspect <= 3.0f) | |
| { | |
| //float sourceratio = (float)width / (float)height; | |
| if (approx_equal_float(outputaspect, 4.0f / 3.0f)) | |
| sensorcrop = sqrtf((float)(width*width + height*height)) / sqrtf((float)((width * 2 / 3)*(width * 2 / 3) + (height*height))); | |
| if (approx_equal_float(outputaspect, 16.0f / 9.0f)) // 0.88; | |
| sensorcrop = 1.00623f; // Fixes the slight calculation error difference between 16x9 with a 4x3, and 16x9 within a 2x1 image. | |
| } | |
| } | |
| if (width >= 2496) | |
| decoder->mesh = geomesh_create(199, 99); | |
| else if (width >= 1272) | |
| decoder->mesh = geomesh_create(99, 49); | |
| else | |
| decoder->mesh = geomesh_create(49, 25); | |
| phi = cfhddata->LensOffsetX * DEG2RAD(720.0f); // +-180deg HFOV for 2:1 | |
| theta = cfhddata->LensOffsetY * DEG2RAD(720.0f); // +-180deg VFOV for 2:1 | |
| rho = (cfhddata->LensOffsetZ - 1.0f)*4.0f* DEG2RAD(360.0f); // +-360deg | |
| } | |
| else if (approx_equal(width * 3, height * 4)) // approx. 4:3 | |
| { | |
| srcLens = HERO4; | |
| sensorcrop = 1.0; | |
| if (width > 2880) // UHD | |
| decoder->mesh = geomesh_create(159, 119); | |
| else if (width >= 1920) //HD/2.7K | |
| decoder->mesh = geomesh_create(79, 59); | |
| else | |
| decoder->mesh = geomesh_create(39, 29); | |
| phi = cfhddata->LensOffsetX * DEG2RAD(120.0f); // +-60deg HFOV for 16:9 | |
| theta = cfhddata->LensOffsetY * DEG2RAD(98.0f); // +-49deg VFOV for 16:9 | |
| rho = (cfhddata->LensOffsetZ - 1.0f)*4.0f* DEG2RAD(360.0f); // +-360deg | |
| } | |
| else //if(approx_equal(width*9,height*16)) // approx. 16:9 | |
| { | |
| srcLens = HERO4; | |
| sensorcrop = sqrtf(1920 * 1920 + 1080 * 1080) / sqrtf(2000 * 2000 + 1500 * 1500); // 3840x2160 from 4000x3000 | |
| if (width > 2880) // UHD | |
| decoder->mesh = geomesh_create(159, 119); | |
| else if (width >= 1920) //HD/2.7K | |
| decoder->mesh = geomesh_create(79, 59); | |
| else | |
| decoder->mesh = geomesh_create(39, 29); | |
| phi = cfhddata->LensOffsetX * DEG2RAD(120.0f); // +-60.1deg HFOV for 16:9 | |
| theta = cfhddata->LensOffsetY * DEG2RAD(70.0f); // +-34.75deg VFOV for 16:9 | |
| rho = (cfhddata->LensOffsetZ - 1.0f)*4.0f* DEG2RAD(360.0f); // +-360deg | |
| } | |
| if ((output_format & 0x7fffffff) == COLOR_FORMAT_YUYV) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_YUY2, width, height, pitch, WARPLIB_FORMAT_YUY2, backgroundfill); | |
| else if ((output_format & 0x7fffffff) == COLOR_FORMAT_RGB32) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_32BGRA, width, height, pitch, WARPLIB_FORMAT_32BGRA, backgroundfill); | |
| else if ((output_format & 0x7fffffff) == COLOR_FORMAT_W13A) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_W13A, width, height, pitch, WARPLIB_FORMAT_W13A, backgroundfill); | |
| else if ((output_format & 0x7fffffff) == COLOR_FORMAT_WP13) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_WP13, width, height, pitch, WARPLIB_FORMAT_WP13, backgroundfill); | |
| else if ((output_format & 0x7fffffff) == COLOR_FORMAT_RG48) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_RG48, width, height, pitch, WARPLIB_FORMAT_RG48, backgroundfill); | |
| else if ((output_format & 0x7fffffff) == COLOR_FORMAT_BGRA64) | |
| status |= geomesh_init(decoder->mesh, width, height, pitch, WARPLIB_FORMAT_64ARGB, width, height, pitch, WARPLIB_FORMAT_64ARGB, backgroundfill); | |
| else | |
| assert(0); | |
| if (cfhddata->lensSphere == 1) | |
| { | |
| if (cfhddata->lensGoPro != 2) // not outputting EQUIRECT | |
| { | |
| if (cfhddata->LensOffsetR != 0.0) | |
| { | |
| //float angle = 360.0 * asinf(cfhddata->LensOffsetR * 1.7777777777) / (2.0 * 3.14159); | |
| float angle = 360.0f * cfhddata->LensOffsetR * cfhddata->LensOffsetR * 2.1f;//asinf(cfhddata->LensOffsetR * 1.7777777777) / (2.0 * 3.14159); | |
| if (cfhddata->LensOffsetR < 0.0) angle = -angle; | |
| geomesh_transform_rotate(decoder->mesh, angle); | |
| } | |
| if (cfhddata->LensZoom != 1.0) | |
| geomesh_transform_scale(decoder->mesh, cfhddata->LensZoom, cfhddata->LensZoom); | |
| if (cfhddata->LensFishFOV != 0.0) // DeFish | |
| { | |
| float fov = cfhddata->LensFishFOV;// *180.0; | |
| if (fov > 89.9f) fov = 89.9f; | |
| if (fov < -89.9f) fov = -89.9f; | |
| if (fov) | |
| status |= geomesh_transform_defish(decoder->mesh, fov); | |
| } | |
| } | |
| switch (cfhddata->lensGoPro) | |
| { | |
| case 0: geomesh_transform_repoint_src_to_dst(decoder->mesh, sensorcrop, phi, theta, rho, srcLens, RECTILINEAR); break; | |
| case 1: geomesh_transform_repoint_src_to_dst(decoder->mesh, sensorcrop, phi, theta, rho, srcLens, HERO4); break; | |
| case 2: geomesh_transform_repoint_src_to_dst(decoder->mesh, sensorcrop, phi, theta, rho, srcLens, EQUIRECT); break; | |
| case 4: | |
| geomesh_set_custom_lens(decoder->mesh, cfhddata->lensCustomSRC, cfhddata->lensCustomDST, sizeof(cfhddata->lensCustomDST)); | |
| if (srcLens == EQUIRECT) geomesh_transform_repoint_src_to_dst(decoder->mesh, sensorcrop, phi, theta, rho, EQUIRECT, CUSTOM_LENS); | |
| else geomesh_transform_repoint_src_to_dst(decoder->mesh, sensorcrop, phi, theta, rho, CUSTOM_LENS, CUSTOM_LENS); | |
| break; | |
| } | |
| } | |
| else // old boring geometry | |
| { | |
| if (cfhddata->LensZoom != 1.0) | |
| geomesh_transform_scale(decoder->mesh, cfhddata->LensZoom, cfhddata->LensZoom); | |
| // basic orthographic moves | |
| if (cfhddata->LensOffsetX != 0.0 || cfhddata->LensOffsetY != 0.0) | |
| geomesh_transform_pan(decoder->mesh, cfhddata->LensOffsetX*(float)width, -cfhddata->LensOffsetY*(float)height); | |
| if (cfhddata->LensOffsetR != 0.0) | |
| { | |
| float angle = 360.0f * asinf(cfhddata->LensOffsetR * 1.7777777777f) / (2.0f * 3.14159f); | |
| geomesh_transform_rotate(decoder->mesh, angle); | |
| } | |
| if (cfhddata->lensGoPro == 0) //Rectilear | |
| status |= geomesh_transform_gopro_to_rectilinear(decoder->mesh, sensorcrop); | |
| //status |= geomesh_fisheye_gopro_adjustmesh(mesh, &correction_mode, WARPLIB_ALGORITHM_PRESERVE_EVERYTHING,//WARPLIB_ALGORITHM_BEST_FIT, | |
| // width, height, product, model, lens_type, fov, (int)decoder->frame.resolution); | |
| } | |
| geomesh_alloc_cache(decoder->mesh); // required for JOB_TYPE_WARP_CACHE | |
| if (status == WARPLIB_SUCCESS) | |
| { | |
| if (decoder->lens_correct_buffer == NULL) | |
| { | |
| #if _ALLOCATOR | |
| decoder->lens_correct_buffer = (int *)Alloc(decoder->allocator, pitch * height); | |
| #else | |
| decoder->lens_correct_buffer = (int *)MEMORY_ALLOC(pitch * height); | |
| #endif | |
| } | |
| } | |
| else | |
| { | |
| return; | |
| } | |
| /* need resources? | |
| { | |
| if(decoder->tools == NULL) | |
| { | |
| #if _ALLOCATOR | |
| decoder->tools = (ToolsHandle *)Alloc(decoder->allocator, sizeof(ToolsHandle)); | |
| #else | |
| decoder->tools = (ToolsHandle *)MEMORY_ALLOC(sizeof(ToolsHandle)); | |
| #endif | |
| if(decoder->tools) | |
| { | |
| memset(decoder->tools, 0, sizeof(ToolsHandle)); | |
| } | |
| else | |
| { | |
| return; | |
| } | |
| } | |
| } | |
| */ | |
| #if _THREADED && 1 | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int workunits = decoder->frame.height; | |
| #if _DELAY_THREAD_START | |
| if (decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| { | |
| // Post a message to the mailbox | |
| mailbox->data = decoder->mesh; | |
| mailbox->output = output; | |
| mailbox->local_output = (uint8_t *)decoder->lens_correct_buffer; | |
| mailbox->line_max = decoder->frame.height; | |
| mailbox->chunk_size = 16; | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| mailbox->jobType = JOB_TYPE_WARP_CACHE; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| } | |
| #endif | |
| //decoder->frame.output_format = output_format; | |
| decoder->lastLensOffsetX = cfhddata->LensOffsetX; | |
| decoder->lastLensOffsetY = cfhddata->LensOffsetY; | |
| decoder->lastLensOffsetZ = cfhddata->LensOffsetZ; | |
| decoder->lastLensOffsetR = cfhddata->LensOffsetR; | |
| decoder->lastLensZoom = cfhddata->LensZoom; | |
| decoder->lastLensFishFOV = cfhddata->LensFishFOV; | |
| decoder->lastLensGoPro = cfhddata->lensGoPro; | |
| decoder->lastLensSphere = cfhddata->lensSphere; | |
| decoder->lastLensFill = cfhddata->lensFill; | |
| decoder->lastLensStyleSel = cfhddata->lensStyleSel; | |
| memcpy(decoder->lastLensCustomSRC, cfhddata->lensCustomSRC, sizeof(cfhddata->lensCustomSRC)); | |
| memcpy(decoder->lastLensCustomDST, cfhddata->lensCustomDST, sizeof(cfhddata->lensCustomDST)); | |
| } | |
| #if _THREADED && 1 | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int workunits = decoder->frame.height; | |
| mailbox->data = decoder->mesh; | |
| mailbox->output = output; | |
| mailbox->local_output = (uint8_t *)decoder->lens_correct_buffer; | |
| mailbox->line_max = decoder->frame.height; | |
| mailbox->chunk_size = 16; | |
| workunits = (mailbox->line_max + mailbox->chunk_size-1)/mailbox->chunk_size; | |
| mailbox->jobType = JOB_TYPE_WARP; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| if(backgroundfill) // may need to blur the filled in areas | |
| { | |
| mailbox->data = decoder->mesh; | |
| mailbox->output = (uint8_t *)decoder->lens_correct_buffer; | |
| mailbox->local_output = (uint8_t *)decoder->lens_correct_buffer; | |
| mailbox->line_max = decoder->frame.width; | |
| mailbox->chunk_size = 16; | |
| mailbox->pitch = pitch; | |
| workunits = (mailbox->line_max + mailbox->chunk_size-1)/mailbox->chunk_size; | |
| mailbox->jobType = JOB_TYPE_WARP_BLURV; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| } | |
| #else // not threading | |
| { | |
| //geomesh_cache_init_bilinear(decoder->mesh); //bad | |
| geomesh_cache_init_bilinear_range(decoder->mesh, 0, decoder->frame.height); //good | |
| geomesh_apply_bilinear(decoder->mesh, (unsigned char *)output, (unsigned char *)decoder->lens_correct_buffer, 0, decoder->frame.height); | |
| } | |
| #endif | |
| memcpy(output, decoder->lens_correct_buffer, pitch * decoder->frame.height); | |
| /* | |
| if(lens_correct_buffer) | |
| #if _ALLOCATOR | |
| Free(decoder->allocator, lens_correct_buffer); | |
| #else | |
| MEMORY_ALIGNED_FREE(lens_correct_buffer); | |
| #endif | |
| geomesh_destroy(mesh); | |
| */ | |
| } | |
| void MaskFrame(DECODER *decoder, uint8_t *output, int pitch, int output_format) | |
| { | |
| int x, y, width, height; | |
| int minY, maxY; | |
| int minX, maxX; | |
| CFHDDATA *cfhddata = &decoder->cfhddata; | |
| uint8_t *line = output; | |
| uint32_t fillA = 0; | |
| uint32_t fillB = 0; | |
| int bitsize = 8; | |
| if (!cfhddata->doMesh) return; | |
| width = decoder->frame.width; | |
| height = decoder->frame.height; | |
| if (decoder->cfhddata.LensYmin == 0.0 && decoder->cfhddata.LensXmin == 0.0 && decoder->cfhddata.LensYmax == 0.0 && decoder->cfhddata.LensXmax == 0.0) return; | |
| if (decoder->cfhddata.LensYmin == 0.0 && decoder->cfhddata.LensXmin == 0.0 && decoder->cfhddata.LensYmax == 1.0 && decoder->cfhddata.LensXmax == 1.0) return; | |
| minY = (int)(decoder->cfhddata.LensYmin*(float)height); | |
| maxY = (int)(decoder->cfhddata.LensYmax*(float)height); | |
| minX = 0xfffc & (int)(decoder->cfhddata.LensXmin*(float)pitch); | |
| maxX = 0xfffc & (int)(decoder->cfhddata.LensXmax*(float)pitch); | |
| if (FORMATRGB(output_format)) | |
| { | |
| line = output; | |
| // Top rows | |
| for (y = 0; y < minY; y++) | |
| { | |
| memset(line, 0, abs(pitch)); | |
| line += pitch; | |
| } | |
| // Left and Right edges of middle rows | |
| if (maxX - minX != pitch) | |
| { | |
| for (; y < maxY; y++) | |
| { | |
| memset(line, 0, minX); | |
| memset(line + maxX, 0, pitch - maxX); | |
| line += pitch; | |
| } | |
| } | |
| //Bottom wows | |
| y = maxY; | |
| line = output + y*pitch; | |
| for (; y < height; y++) | |
| { | |
| memset(line, 0, abs(pitch)); | |
| line += pitch; | |
| } | |
| } | |
| else | |
| { | |
| switch (output_format & 0x7fffffff) | |
| { | |
| case COLOR_FORMAT_YVYU: | |
| case COLOR_FORMAT_YUYV: | |
| fillA = 0x10; | |
| fillB = 0x80; | |
| break; | |
| case COLOR_FORMAT_UYVY: | |
| case COLOR_FORMAT_2VUY: | |
| fillA = 0x80; | |
| fillB = 0x10; | |
| break; | |
| case COLOR_FORMAT_YU64: | |
| fillA = 0x8000; | |
| fillB = 0x1000; | |
| bitsize = 16; | |
| break; | |
| } | |
| } | |
| if (bitsize == 8) | |
| { | |
| line = output; | |
| // Top rows | |
| for (y = 0; y < minY; y++) | |
| { | |
| for (x = 0; x < pitch; x += 2) | |
| { | |
| line[x] = fillA; | |
| line[x + 1] = fillB; | |
| } | |
| line += pitch; | |
| } | |
| // Left and Right edges of middle rows | |
| if (maxX - minX != pitch) | |
| { | |
| for (; y < maxY; y++) | |
| { | |
| for (x = 0; x < minX; x += 2) | |
| { | |
| line[x] = fillA; | |
| line[x + 1] = fillB; | |
| } | |
| for (x = maxX; x < pitch; x += 2) | |
| { | |
| line[x] = fillA; | |
| line[x + 1] = fillB; | |
| } | |
| line += pitch; | |
| } | |
| } | |
| //Bottom wows | |
| y = maxY; | |
| line = output + y*pitch; | |
| for (; y < height; y++) | |
| { | |
| for (x = 0; x < pitch; x += 2) | |
| { | |
| line[x] = fillA; | |
| line[x + 1] = fillB; | |
| } | |
| line += pitch; | |
| } | |
| } | |
| } | |
| #endif //#if WARPSTUFF | |
| void ConvertLocalToOutput(DECODER *decoder, uint8_t *output, int pitch, int output_format, uint8_t *local_output, int local_pitch, int channel_offset) | |
| { | |
| uint8_t *local_output_double = local_output; | |
| //Frame_Region emptyFrameMask = {0}; | |
| if(decoder->StereoBuffer) | |
| local_output_double = local_output = (uint8_t *)decoder->StereoBuffer; | |
| if(channel_offset < 0) // channel swapped | |
| { | |
| channel_offset = -channel_offset; | |
| } | |
| if(INVERTEDFORMAT(decoder->frame.format) != INVERTEDFORMAT(output_format)) | |
| { | |
| local_output += local_pitch*(decoder->frame.height-1); | |
| if(decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC) | |
| local_output_double += local_pitch*(decoder->frame.height*decoder->channel_decodes-1); | |
| else | |
| local_output_double = local_output; | |
| local_pitch = -local_pitch; | |
| } | |
| if(FLIPCOLORS(output_format) || output_format & 0x80000000) | |
| { | |
| decoder->cfhddata.InvertOffset = 1; | |
| } | |
| else | |
| { | |
| decoder->cfhddata.InvertOffset = 0; | |
| } | |
| decoder->frame.format = output_format; | |
| //decoder->frame.colorspace = COLOR_SPACE_CG_601; | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int workunits; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| if( ((decoder->cfhddata.process_path_flags & PROCESSING_ORIENTATION) && | |
| (decoder->cfhddata.channel[0].FrameAutoZoom * decoder->cfhddata.channel[1].FrameDiffZoom != 1.0 || | |
| decoder->cfhddata.channel[1].FrameKeyStone || | |
| decoder->cfhddata.channel[1].VerticalOffset || | |
| decoder->cfhddata.channel[1].RotationOffset || | |
| decoder->cfhddata.channel[1].FrameTilt || | |
| decoder->cfhddata.channel[0].FrameAutoZoom / decoder->cfhddata.channel[2].FrameDiffZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FrameKeyStone || | |
| decoder->cfhddata.channel[2].VerticalOffset || | |
| decoder->cfhddata.channel[2].RotationOffset || | |
| decoder->cfhddata.channel[2].FrameTilt)) | |
| || | |
| ((decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) && | |
| (decoder->cfhddata.FrameOffsetY || | |
| decoder->cfhddata.FrameOffsetR || | |
| // decoder->cfhddata.FrameOffsetX || || | |
| decoder->cfhddata.FrameHScale != 1.0 || | |
| decoder->cfhddata.FrameHDynamic != 1.0 || | |
| decoder->cfhddata.channel[1].FrameZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FrameZoom != 1.0) )) | |
| { | |
| //int x; | |
| int xbytes, xstep; | |
| //uint8_t *base = local_output; | |
| int width, height, chunk_size; | |
| int fine_vertical = 0; | |
| width = decoder->frame.width; | |
| height = decoder->frame.height; | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| xbytes = width*4; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| xbytes = width*3; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| xbytes = width*2; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| xbytes = width*8; | |
| xstep = 32; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| xbytes = width*6; | |
| xstep = 32; | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| if(!(decoder->cfhddata.process_path_flags & (PROCESSING_ORIENTATION|PROCESSING_FRAMING)) || | |
| (decoder->cfhddata.channel[1].RotationOffset == 0.0 && decoder->cfhddata.channel[1].FrameKeyStone == 0.0 && | |
| decoder->cfhddata.channel[2].RotationOffset == 0.0 && decoder->cfhddata.channel[2].FrameKeyStone == 0.0 && | |
| decoder->cfhddata.FrameOffsetR == 0.0)) | |
| { | |
| chunk_size = 8; | |
| } | |
| else | |
| { | |
| chunk_size = 1; | |
| if((fabs(decoder->cfhddata.channel[1].RotationOffset) + | |
| fabs(decoder->cfhddata.channel[1].FrameKeyStone*0.2) + | |
| fabs(decoder->cfhddata.FrameOffsetR)) > 0.015 || | |
| (fabs(decoder->cfhddata.channel[2].RotationOffset) + | |
| fabs(decoder->cfhddata.channel[2].FrameKeyStone*0.2) + | |
| fabs(decoder->cfhddata.FrameOffsetR)) > 0.015) | |
| { | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| xstep = 4; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| xstep = 3; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| xstep = 4; | |
| break; | |
| case DECODED_FORMAT_W13A: | |
| case DECODED_FORMAT_RG64: | |
| xstep = 8; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| default: | |
| xstep = 6; | |
| break; | |
| } | |
| fine_vertical = 1; | |
| } | |
| } | |
| if( decoder->codec.encoded_format == ENCODED_FORMAT_YUV_422 && | |
| (decoder->frame.resolution == DECODED_RESOLUTION_FULL || | |
| decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) && | |
| decoder->codec.progressive == false) | |
| { | |
| int interlaced_pitch = local_pitch * 2; | |
| uint8_t *field2_output = local_output + local_pitch; | |
| // Post a message to the mailbox | |
| mailbox->local_output = local_output; | |
| mailbox->local_pitch = interlaced_pitch; | |
| mailbox->channel_offset = channel_offset; | |
| memcpy(&mailbox->info, &decoder->frame, sizeof(FRAME_INFO)); | |
| mailbox->info.height >>= 1; | |
| mailbox->line_max = (xbytes + xstep-1)/xstep; | |
| mailbox->chunk_size = chunk_size; | |
| mailbox->fine_vertical = fine_vertical; | |
| mailbox->jobType = JOB_TYPE_VERTICAL_3D; // 3d work -- vertical | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| // Post a message to the mailbox | |
| mailbox->local_output = field2_output; | |
| mailbox->local_pitch = interlaced_pitch; | |
| mailbox->channel_offset = channel_offset; | |
| memcpy(&mailbox->info, &decoder->frame, sizeof(FRAME_INFO)); | |
| mailbox->info.height >>= 1; | |
| mailbox->chunk_size = chunk_size; | |
| mailbox->line_max = (xbytes + xstep-1)/xstep; | |
| mailbox->fine_vertical = fine_vertical; | |
| mailbox->jobType = JOB_TYPE_VERTICAL_3D; // 3d work -- vertical | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| else | |
| { | |
| //TODO Lens corect here. | |
| //call JOB_TYPE_VERTICAL_3D then (or lens correction equivalent.) | |
| // JOB_TYPE_HORIZONTAL_3D | |
| //before doing any offset and rotation corrections. | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) //HACK //DAN20110129 | |
| width /= 2; | |
| // Post a message to the mailbox | |
| mailbox->local_output = local_output; | |
| mailbox->local_pitch = local_pitch; | |
| mailbox->channel_offset = channel_offset; | |
| memcpy(&mailbox->info, &decoder->frame, sizeof(FRAME_INFO)); | |
| mailbox->chunk_size = chunk_size; | |
| mailbox->line_max = (xbytes + xstep-1)/xstep; | |
| mailbox->fine_vertical = fine_vertical; | |
| mailbox->jobType = JOB_TYPE_VERTICAL_3D; // 3d work -- vertical | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| } | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| mailbox->local_output = local_output; | |
| mailbox->local_pitch = local_pitch; | |
| mailbox->channel_offset = channel_offset; | |
| memcpy(&mailbox->info, &decoder->frame, sizeof(FRAME_INFO)); | |
| mailbox->chunk_size = 16; | |
| mailbox->line_max = decoder->frame.height; | |
| if(decoder->channel_mix_half_res == 1) | |
| mailbox->line_max *= 2; | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| decoder->doVerticalFilter = 0; | |
| mailbox->jobType = JOB_TYPE_HORIZONAL_3D; // 3d work && horizontal and vertical flips | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| if(decoder->doVerticalFilter) | |
| { | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| mailbox->local_output = local_output_double; | |
| mailbox->local_pitch = local_pitch; | |
| mailbox->channel_offset = channel_offset; | |
| memcpy(&mailbox->info, &decoder->frame, sizeof(FRAME_INFO)); | |
| mailbox->chunk_size = 16; | |
| mailbox->line_max = decoder->frame.height; | |
| if(decoder->channel_decodes == 2 && decoder->channel_blend_type == 0) | |
| mailbox->line_max *= 2; | |
| if(decoder->channel_mix_half_res == 1) | |
| mailbox->line_max *= 2; | |
| workunits = (mailbox->line_max + mailbox->chunk_size - 1) / mailbox->chunk_size; | |
| mailbox->jobType = JOB_TYPE_SHARPEN; // 3d work && horizontal and vertical flips | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, workunits); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| } | |
| #else | |
| { | |
| int y,width, height; | |
| uint8_t scratch[4096*16]; | |
| int scratchremain = 4096*16; | |
| int ymin = 0, ymax; | |
| width = decoder->frame.width; | |
| height = decoder->frame.height; | |
| ymax = height; | |
| if((decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) && | |
| memcmp(&decoder->cfhddata.channel[0].FrameMask, &emptyFrameMask, 32)) | |
| { | |
| ymin = (float)height * decoder->cfhddata.channel[0].FrameMask.topLftY; | |
| ymax = (float)height * decoder->cfhddata.channel[0].FrameMask.botLftY; | |
| } | |
| if( ((decoder->cfhddata.process_path_flags & PROCESSING_ORIENTATION) && | |
| (decoder->cfhddata.channel[0].FrameAutoZoom * decoder->cfhddata.channel[1].FrameDiffZoom != 1.0 || | |
| decoder->cfhddata.channel[1].FrameKeyStone || | |
| decoder->cfhddata.channel[1].VerticalOffset || | |
| decoder->cfhddata.channel[1].RotationOffset || | |
| decoder->cfhddata.channel[0].FrameAutoZoom / decoder->cfhddata.channel[2].FrameDiffZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FrameKeyStone || | |
| decoder->cfhddata.channel[2].VerticalOffset || | |
| decoder->cfhddata.channel[2].RotationOffset)) | |
| || | |
| ((decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) && | |
| (decoder->cfhddata.FrameOffsetY || | |
| decoder->cfhddata.FrameOffsetR || | |
| decoder->cfhddata.FrameOffsetX || | |
| decoder->cfhddata.FrameHScale != 1.0 || | |
| decoder->cfhddata.FrameHDynamic != 1.0 || | |
| decoder->cfhddata.channel[1].FrameZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FrameZoom != 1.0)) | |
| { | |
| int x,xbytes, xstep; | |
| uint8_t *base = local_output; | |
| float voffsetstep; | |
| float voffset = decoder->cfhddata.channel[1].VerticalOffset; | |
| float roffset = decoder->cfhddata.channel[1].RotationOffset; | |
| float voffset1, voffset2; | |
| float voffsetstep1, voffsetstep2; | |
| int channel_flip = decoder->cfhddata.channel_flip; | |
| int aspectx,aspecty; | |
| float aspectfix; | |
| GetDisplayAspectRatio(decoder, &aspectx, &aspecty); | |
| aspectfix = (float)(aspectx*aspectx) / (float)(aspecty*aspecty); | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_ORIENTATION)) | |
| { | |
| voffset = roffset = 0; | |
| } | |
| if(!(decoder->cfhddata.process_path_flags & PROCESSING_IMAGEFLIPS)) | |
| { | |
| channel_flip = 0; | |
| } | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_FRAMING) | |
| voffset += decoder->cfhddata.FrameOffsetY; | |
| if(decoder->cfhddata.InvertOffset) | |
| { | |
| voffset = -voffset; | |
| roffset = -roffset; | |
| } | |
| switch(decoder->StereoBufferFormat) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| xbytes = width*4; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| xbytes = width*3; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| xbytes = width*2; | |
| xstep = 16; | |
| break; | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_RG48: | |
| default: | |
| xbytes = width*6; | |
| xstep = 32; | |
| break; | |
| } | |
| //DAN20100923 -- simplied | |
| //voffset += roffset * (float)(width*width) / (float)(height*height) * 0.5; | |
| //voffsetstep = -roffset * (float)(width*width) / (float)(height*height) / (float)(xbytes/xstep); | |
| voffset += roffset * aspectfix * 0.5; | |
| voffsetstep = -roffset * aspectfix / (float)(xbytes/xstep); | |
| if(roffset == 0.0) | |
| xstep = xbytes; | |
| voffset1 = voffset2 = voffset; | |
| voffsetstep1 = voffsetstep2 = voffsetstep; | |
| if(channel_flip & 0xf) | |
| { | |
| if(channel_flip & 2) | |
| { | |
| voffset1 = -voffset1; | |
| voffsetstep1 = -voffsetstep1; | |
| } | |
| if(channel_flip & 8) | |
| { | |
| voffset2 = -voffset2; | |
| voffsetstep2 = -voffsetstep2; | |
| } | |
| if(channel_flip & 1) | |
| { | |
| voffset1 += voffsetstep1*(xbytes/xstep); | |
| voffsetstep1 = -voffsetstep1; | |
| } | |
| if(channel_flip & 4) | |
| { | |
| voffset2 += voffsetstep2*(xbytes/xstep); | |
| voffsetstep2 = -voffsetstep2; | |
| } | |
| } | |
| for(x=0; x<xbytes; x+=xstep) | |
| { | |
| if(decoder->channel_decodes == 1 && decoder->channel_current == 1) // Right only | |
| { | |
| RGB48VerticalShift(decoder, base, (unsigned short *)scratch, | |
| xstep, height, local_pitch, -voffset2); | |
| } | |
| else | |
| { | |
| RGB48VerticalShift(decoder, base, (unsigned short *)scratch, | |
| xstep, height, local_pitch, voffset1); | |
| } | |
| if(decoder->channel_decodes == 2) | |
| { | |
| uint8_t *bptr = base + channel_offset; | |
| RGB48VerticalShift(decoder, bptr, (unsigned short *)scratch, | |
| xstep, height, local_pitch, -voffset2); | |
| } | |
| base += xstep; | |
| voffset1 += voffsetstep1; | |
| voffset2 += voffsetstep2; | |
| } | |
| } | |
| if(decoder->channel_mix_half_res == 1) | |
| height *= 2; | |
| if(ymin) | |
| { | |
| memset(local_output, 0, abs(local_pitch)); // zero one line; | |
| } | |
| for(y=0; y<ymin; y++) | |
| { | |
| ProcessLine3D(decoder, scratch, scratchremain, output, pitch, local_output, 0, channel_offset, y, 0); | |
| } | |
| for(; y<ymax; y++) | |
| { | |
| ProcessLine3D(decoder, scratch, scratchremain, output, pitch, local_output, local_pitch, channel_offset, y, 0); | |
| } | |
| for(; y<height; y++) | |
| { | |
| ProcessLine3D(decoder, scratch, scratchremain, output, pitch, local_output, 0, channel_offset, y, 0); | |
| } | |
| } | |
| #endif | |
| } | |
| // Decode a sample from the input bitstream into the output frame buffer | |
| bool DecodeSample(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams, CFHDDATA *cfhddata) | |
| { | |
| //CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int subband_wavelet_index[] = {5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 1, 1, 1, 0, 0, 0}; | |
| int channel_decodes = 1; // 3D Work | |
| int channel_offset = 0; | |
| int channel_mask = 0; | |
| int channel_current = 0; | |
| //int wavelet_index; | |
| bool result = true; | |
| uint8_t *local_output = output; | |
| uint8_t *local_buffer = NULL; | |
| int local_pitch = pitch; | |
| int internal_format = decoder->frame.format; | |
| int output_format = decoder->frame.output_format; | |
| bool use_local_buffer = false; | |
| DECODER *local_decoder = decoder; | |
| //Frame_Region emptyFrameMask = {0}; | |
| Frame_Region emptyFrameMask = FRAME_REGION_INITIALIZER; | |
| int orig_width = decoder->frame.width; | |
| int orig_height = decoder->frame.height; | |
| decoder->local_output = local_output; // used for NV12 decodes. | |
| decoder->sample_uncompressed = 0; // set if a uncompressed sample is found. | |
| decoder->image_dev_only = 0; | |
| if(decoder->flags & (1<<3)) // This is an image development only decode. | |
| { | |
| decoder->sample_uncompressed = 1; | |
| decoder->image_dev_only = 1; | |
| decoder->codec.encoded_format = ENCODED_FORMAT_RGB_444; | |
| decoder->codec.unique_framenumber = 0; //What should this be? | |
| decoder->frame.white_point = 16; // how to we pass this in? | |
| decoder->uncompressed_chunk = (uint32_t *)input->lpCurrentBuffer; | |
| switch(output_format & 0x7fffffff) | |
| { | |
| case COLOR_FORMAT_RGB24: | |
| decoder->uncompressed_size = orig_width * orig_height * 3; | |
| break; | |
| case COLOR_FORMAT_RGB32: | |
| decoder->uncompressed_size = orig_width * orig_height * 4; | |
| break; | |
| case COLOR_FORMAT_RG48: | |
| case COLOR_FORMAT_WP13: | |
| decoder->uncompressed_size = orig_width * orig_height * 6; | |
| break; | |
| default: | |
| decoder->uncompressed_size = orig_width * orig_height * 6; | |
| assert(0); | |
| break; | |
| } | |
| } | |
| decoder->frame.alpha_Companded = 0; // reset this state. | |
| if(decoder->parallelDecoder) | |
| decoder->parallelDecoder->sample_uncompressed = 0; | |
| decoder->error = CODEC_ERROR_OKAY; | |
| input->error = BITSTREAM_ERROR_OKAY; | |
| // first time through encoded_format is not initized. | |
| if(input->nWordsUsed > 4096 && decoder->image_dev_only == 0) // an I-frame is needed | |
| { | |
| SAMPLE_HEADER header; | |
| BITSTREAM input2; | |
| InitBitstreamBuffer(&input2, input->lpCurrentWord, input->nWordsUsed, BITSTREAM_ACCESS_READ); | |
| memset(&header, 0, sizeof(SAMPLE_HEADER)); | |
| header.find_lowpass_bands = 2; // help finding the uncompressed flag | |
| if(ParseSampleHeader(&input2, &header)) | |
| { | |
| decoder->codec.encoded_format = header.encoded_format; | |
| decoder->sample_uncompressed = header.hdr_uncompressed; | |
| if(decoder->parallelDecoder) | |
| decoder->parallelDecoder->sample_uncompressed = header.hdr_uncompressed; | |
| } | |
| } | |
| if((uintptr_t)input->lpCurrentBuffer & 0x3) | |
| { | |
| if(decoder->aligned_sample_buffer == NULL) | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| decoder->aligned_sample_buffer = | |
| (uint8_t *)AllocAligned(allocator, (size_t)input->dwBlockLength, 16); | |
| #else | |
| decoder->aligned_sample_buffer = | |
| (uint8_t *)MEMORY_ALIGNED_ALLOC(input->dwBlockLength, 16); | |
| #endif | |
| memcpy(decoder->aligned_sample_buffer, input->lpCurrentBuffer, input->dwBlockLength); | |
| decoder->aligned_sample_buffer_size = input->dwBlockLength; | |
| } | |
| else | |
| { | |
| if ((size_t)input->dwBlockLength <= decoder->aligned_sample_buffer_size) | |
| { | |
| memcpy(decoder->aligned_sample_buffer, input->lpCurrentBuffer, input->dwBlockLength); | |
| } | |
| else | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| FreeAligned(decoder->allocator, decoder->aligned_sample_buffer); | |
| decoder->aligned_sample_buffer = | |
| (uint8_t *)AllocAligned(allocator, input->dwBlockLength, 16); | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->aligned_sample_buffer); | |
| decoder->aligned_sample_buffer = | |
| (uint8_t *)MEMORY_ALIGNED_ALLOC(input->dwBlockLength, 16); | |
| #endif | |
| memcpy(decoder->aligned_sample_buffer, input->lpCurrentBuffer, input->dwBlockLength); | |
| decoder->aligned_sample_buffer_size = input->dwBlockLength; | |
| } | |
| } | |
| input->lpCurrentBuffer = decoder->aligned_sample_buffer; | |
| input->lpCurrentWord = decoder->aligned_sample_buffer; | |
| } | |
| #if 0 // Test for missaligning the image data | |
| if(((int)input->lpCurrentBuffer&3) == 0) | |
| { | |
| int i; | |
| uint8_t *ptr = (uint8_t *)input->lpCurrentBuffer; | |
| int missaligned = 1; //2 or 3 | |
| for(i=input->dwBlockLength-1; i>=0; i--) | |
| ptr[i+missaligned] = ptr[missaligned]; | |
| input->lpCurrentBuffer = (uint8_t *)&ptr[missaligned]; | |
| input->lpCurrentWord = (uint8_t *)&ptr[missaligned]; | |
| } | |
| #endif | |
| //HACK | |
| // Unfortunately I need color matrix data deep within the codec for RT playback. | |
| if(cfhddata && cfhddata->MagicNumber == CFHDDATA_MAGIC_NUMBER) // valid input | |
| { | |
| if(decoder->cfhddata.MagicNumber != CFHDDATA_MAGIC_NUMBER) | |
| { | |
| //int size = cfhddata->size; | |
| size_t size = cfhddata->size; | |
| memset(&decoder->cfhddata, 0, sizeof(CFHDDATA)); | |
| if (size > sizeof(CFHDDATA)) { | |
| // Limit the size to the known structure | |
| size = sizeof(CFHDDATA); | |
| } | |
| memcpy(&decoder->cfhddata, cfhddata, size); | |
| } | |
| } | |
| else | |
| { | |
| unsigned short value; | |
| if(decoder->cfhddata.MagicNumber != CFHDDATA_MAGIC_NUMBER || decoder->cfhddata.size != sizeof(CFHDDATA)) | |
| { | |
| memset(&decoder->cfhddata, 0, sizeof(CFHDDATA)); | |
| decoder->cfhddata.MagicNumber = CFHDDATA_MAGIC_NUMBER; | |
| decoder->cfhddata.size = sizeof(CFHDDATA); | |
| if(decoder->image_dev_only) // For baseband image only corrections, initize the decoder with defaults | |
| { | |
| decoder->cfhddata.cfhd_subtype = 2; //RGB | |
| decoder->cfhddata.num_channels = 3; | |
| } | |
| else if(GetTuplet(input->lpCurrentBuffer, input->nWordsUsed, CODEC_TAG_INPUT_FORMAT, &value)) | |
| { | |
| if(value == COLOR_FORMAT_RG48) | |
| { | |
| decoder->cfhddata.cfhd_subtype = 2; //RGB | |
| decoder->cfhddata.num_channels = 3; | |
| } | |
| else if(value == COLOR_FORMAT_RG64) | |
| { | |
| decoder->cfhddata.cfhd_subtype = 3; //RGBA | |
| decoder->cfhddata.num_channels = 4; | |
| } | |
| else if(value > COLOR_FORMAT_BAYER && value < COLOR_FORMAT_BAYER_END) | |
| { | |
| unsigned int format = BAYER_FORMAT_RED_GRN; | |
| decoder->cfhddata.cfhd_subtype = 1; //BAYER | |
| decoder->cfhddata.bayer_format = format; // default to Red-Grn | |
| decoder->cfhddata.version = CFHDDATA_VERSION; | |
| } | |
| } | |
| } | |
| } | |
| OverrideCFHDDATA(decoder, input->lpCurrentBuffer, input->nWordsUsed); | |
| if(decoder->image_dev_only) // HACK we need to support 3D also. | |
| decoder->source_channels = 1; | |
| else | |
| decoder->source_channels = decoder->real_channels = SkipVideoChannel(decoder, input, 0); | |
| if(!decoder->basic_only && (decoder->cfhddata.MSChannel_type_value || decoder->cfhddata.MSCTV_Override)) | |
| { | |
| //int channels = 0; | |
| int channel_blend_type = BLEND_NONE; | |
| int channel_swapped_flags = 0; | |
| if(decoder->cfhddata.MSCTV_Override) | |
| { | |
| channel_mask = decoder->cfhddata.MSCTV_Override&0xff; | |
| channel_blend_type = ((decoder->cfhddata.MSCTV_Override>>8) & 0xff); | |
| channel_swapped_flags = ((decoder->cfhddata.MSCTV_Override>>16) & 0xffff); | |
| } | |
| else | |
| { | |
| channel_mask = decoder->cfhddata.MSChannel_type_value&0xff; | |
| channel_blend_type = ((decoder->cfhddata.MSChannel_type_value>>8) & 0xff); | |
| channel_swapped_flags = ((decoder->cfhddata.MSChannel_type_value>>16) & 0xffff); | |
| } | |
| if(channel_mask != 3) | |
| { | |
| channel_blend_type = BLEND_NONE; | |
| channel_swapped_flags = 0; | |
| } | |
| //if(channels >= 2) // even "mono" files need to be displayed as Stereo if a 3D mode is selected //DAN20090302 | |
| { | |
| if(channel_mask == 1 && decoder->source_channels >= 2) // Decode Left only | |
| { | |
| if(decoder->cfhddata.FramingFlags & 2) // channel swap | |
| { | |
| SkipVideoChannel(decoder, input, 2); // 3D work | |
| } | |
| } | |
| else if(channel_mask == 2 && decoder->source_channels >= 2) // Decode Right only | |
| { | |
| if(decoder->cfhddata.FramingFlags & 2) // channel swap | |
| { | |
| SkipVideoChannel(decoder, input, 1); // 3D work | |
| } | |
| else | |
| { | |
| //assume second channel decode | |
| SkipVideoChannel(decoder, input, 2); // 3D work | |
| } | |
| channel_current = 1; | |
| channel_decodes = 1; | |
| channel_blend_type = BLEND_NONE; | |
| channel_swapped_flags = 0; | |
| } | |
| else if(channel_mask == 2 && decoder->source_channels <= 1) // Decode 2D as Right channel | |
| { | |
| channel_current = 1; | |
| channel_decodes = 1; | |
| channel_blend_type = BLEND_NONE; | |
| channel_swapped_flags = 0; | |
| } | |
| else if((channel_mask&3) == 3) // A+B 3d work | |
| { | |
| channel_decodes = 2; | |
| decoder->channel_mix_half_res = 0; | |
| if(channel_blend_type != BLEND_NONE) | |
| { | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| { | |
| //if(decoder->frame.format == DECODED_FORMAT_W13A) | |
| // { | |
| // decoder->frame.format = internal_format = DECODED_FORMAT_W13A; | |
| // } | |
| //else | |
| //{ | |
| // decoder->frame.format = internal_format = DECODED_FORMAT_RG64; | |
| // } | |
| decoder->frame.format = internal_format = DECODED_FORMAT_RGB32; | |
| local_pitch = decoder->frame.width * 4; | |
| } | |
| else | |
| { | |
| decoder->frame.format = internal_format = DECODED_FORMAT_RGB24; | |
| local_pitch = decoder->frame.width * 3; //RGB24 | |
| } | |
| /* if(decoder->frame.resolution == DECODED_RESOLUTION_FULL && | |
| (output_format == DECODED_FORMAT_YUYV || | |
| output_format == DECODED_FORMAT_UYVY)) | |
| { | |
| if( channel_blend_type == BLEND_FREEVIEW || | |
| ((channel_blend_type == BLEND_STACKED_ANAMORPHIC || | |
| channel_blend_type == BLEND_SIDEBYSIDE_ANAMORPHIC || | |
| channel_blend_type == BLEND_LINE_INTERLEAVED) && decoder->frame.width > 1280)) | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_HALF; | |
| decoder->channel_mix_half_res = 1; | |
| decoder->frame.width /= 2; | |
| decoder->frame.height /= 2; | |
| local_pitch = (decoder->frame.width) * 3; //RGB24 | |
| } | |
| } */ | |
| } | |
| /* if(channel_blend_type == BLEND_STEREO_YUY2inRGBA) //YUY2 in RGBA | |
| { | |
| decoder->frame.format = internal_format = DECODED_FORMAT_YUYV; | |
| local_pitch = decoder->frame.width * 2; //YUY2 | |
| channel_offset = local_pitch * (decoder->frame.height); | |
| use_local_buffer = true; | |
| }*/ | |
| /* DAN20120316 FLAG3D_HALFRES broken if(decoder->frame.resolution == DECODED_RESOLUTION_FULL && channel_swapped_flags & FLAG3D_HALFRES && output_format != DECODED_FORMAT_W13A) | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_HALF; | |
| decoder->channel_mix_half_res = 1; | |
| decoder->frame.width /= 2; | |
| decoder->frame.height /= 2; | |
| local_pitch /= 2; | |
| } */ | |
| if( decoder->frame.resolution == DECODED_RESOLUTION_FULL && | |
| (channel_blend_type == BLEND_SIDEBYSIDE_ANAMORPHIC || channel_blend_type == BLEND_FREEVIEW)) | |
| { | |
| if(decoder->codec.encoded_format != ENCODED_FORMAT_BAYER) | |
| { | |
| if(decoder->sample_uncompressed) | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_HALF; | |
| decoder->channel_mix_half_res = 1; | |
| decoder->frame.width /= 2; | |
| decoder->frame.height /= 2; | |
| local_pitch /= 2; | |
| } | |
| else | |
| { | |
| if(decoder->preformatted_3D_type > BLEND_NONE) | |
| { | |
| // leave as is. | |
| } | |
| else if(FORMAT8BIT(output_format)) | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_HALF_HORIZONTAL; | |
| decoder->frame.width /= 2; | |
| local_pitch /= 2; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| if(FORMAT8BIT(output_format)) | |
| decoder->frame.resolution = DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER; | |
| } | |
| //TODO int uncompressed = decoder->uncompressed_chunk && decoder->uncompressed_size && decoder->sample_uncompressed; | |
| } | |
| if(channel_blend_type >= BLEND_STACKED_ANAMORPHIC && channel_blend_type < BLEND_ANAGLYPH_RC)// stacked, side-by-side, fields, Onion, YUY2 | |
| { | |
| channel_offset = local_pitch * (decoder->frame.height); | |
| } | |
| else if(channel_blend_type >= BLEND_ANAGLYPH_RC) | |
| { | |
| /* if(channel_blend_type & 1 && channel_blend_type <= 21) // B&W Anaglyph | |
| { | |
| //B&W using YUYV | |
| decoder->frame.format = internal_format = DECODED_FORMAT_YUYV; | |
| local_pitch = decoder->frame.width * 2; //YUY2 | |
| }*/ | |
| channel_offset = local_pitch * (decoder->frame.height); | |
| use_local_buffer = true; | |
| } | |
| else if(channel_blend_type == BLEND_NONE) // double high | |
| { | |
| channel_offset = pitch * decoder->frame.height; | |
| } | |
| else | |
| { | |
| channel_blend_type = BLEND_STACKED_ANAMORPHIC; | |
| channel_offset = pitch * (decoder->frame.height/2); | |
| } | |
| // fields, stacked, etc, only works on full or half res. | |
| if (channel_blend_type > BLEND_NONE && channel_blend_type <= BLEND_LINE_INTERLEAVED && | |
| decoder->frame.resolution == DECODED_RESOLUTION_LOWPASS_ONLY) //thumnbail. | |
| { | |
| channel_decodes = 1; | |
| channel_blend_type = BLEND_NONE; | |
| channel_swapped_flags = 0; | |
| } | |
| if (channel_blend_type != BLEND_NONE && | |
| (output_format == DECODED_FORMAT_BYR1 || | |
| output_format == DECODED_FORMAT_BYR2 || | |
| output_format == DECODED_FORMAT_BYR3 || | |
| output_format == DECODED_FORMAT_BYR4 )) | |
| { | |
| channel_decodes = 1; | |
| channel_blend_type = BLEND_NONE; | |
| channel_swapped_flags = 0; | |
| } | |
| } | |
| } | |
| decoder->channel_decodes = channel_decodes; | |
| decoder->channel_blend_type = channel_blend_type; | |
| decoder->channel_swapped_flags = channel_swapped_flags; | |
| } | |
| else | |
| { | |
| decoder->channel_decodes = channel_decodes = 1; | |
| decoder->channel_blend_type = BLEND_NONE; | |
| decoder->channel_swapped_flags = 0; | |
| } | |
| if(cfhddata) // So the P-frames can know the bayerformat | |
| { | |
| //int size = cfhddata->size; | |
| size_t size = cfhddata->size; | |
| if (size > sizeof(CFHDDATA)) { | |
| size = sizeof(CFHDDATA); | |
| } | |
| memcpy(cfhddata, &decoder->cfhddata, size); | |
| } | |
| { | |
| bool doOrientation = true; | |
| bool doFraming = true; | |
| bool doBurins = true; | |
| bool doImageflips = true; | |
| bool doGhostBust = false; | |
| bool doPrimaries = true; | |
| int process_path_flags = decoder->cfhddata.process_path_flags; | |
| int process_path_flags_mask = decoder->cfhddata.process_path_flags_mask; | |
| if(decoder->basic_only) | |
| { | |
| doOrientation = false; | |
| doFraming = false; | |
| doBurins = false; | |
| doImageflips = false; | |
| doPrimaries = false; | |
| } | |
| else | |
| { | |
| if(decoder->cfhddata.process_path_flags_mask) | |
| { | |
| //DAN20101007 -- | |
| if(process_path_flags == 0) | |
| decoder->cfhddata.process_path_flags = process_path_flags = decoder->cfhddata.process_path_flags_mask; | |
| process_path_flags &= decoder->cfhddata.process_path_flags_mask; | |
| if(process_path_flags_mask & PROCESSING_ACTIVE2) | |
| { | |
| if(!(process_path_flags_mask & PROCESSING_ORIENTATION)) | |
| doOrientation = false; | |
| if(!(process_path_flags_mask & PROCESSING_FRAMING)) | |
| doFraming = false; | |
| if(!(process_path_flags_mask & PROCESSING_BURNINS)) | |
| doBurins = false; | |
| if(!(process_path_flags_mask & PROCESSING_IMAGEFLIPS)) | |
| doImageflips = false; | |
| } | |
| if(!(process_path_flags_mask & PROCESSING_COLORMATRIX)) | |
| doPrimaries = false; | |
| } | |
| if(process_path_flags & PROCESSING_ACTIVE2) | |
| { | |
| if(!(process_path_flags & PROCESSING_ORIENTATION)) | |
| doOrientation = false; | |
| if(!(process_path_flags & PROCESSING_FRAMING)) | |
| doFraming = false; | |
| if(!(process_path_flags & PROCESSING_BURNINS)) | |
| doBurins = false; | |
| if(!(process_path_flags & PROCESSING_IMAGEFLIPS)) | |
| doImageflips = false; | |
| if(!(process_path_flags & PROCESSING_COLORMATRIX)) | |
| doPrimaries = false; | |
| } | |
| } | |
| if(doOrientation) | |
| process_path_flags |= PROCESSING_ORIENTATION; | |
| if(doFraming) | |
| process_path_flags |= PROCESSING_FRAMING; | |
| if(doBurins) | |
| process_path_flags |= PROCESSING_BURNINS; | |
| if(doImageflips) | |
| process_path_flags |= PROCESSING_IMAGEFLIPS; | |
| if(doPrimaries) | |
| process_path_flags |= PROCESSING_COLORMATRIX; | |
| if(decoder->channel_swapped_flags & FLAG3D_GHOSTBUST) | |
| { | |
| if(decoder->ghost_bust_left || decoder->ghost_bust_right) | |
| { | |
| doGhostBust = true; | |
| } | |
| } | |
| decoder->cfhddata.process_path_flags = process_path_flags; | |
| if((!decoder->basic_only && | |
| (doOrientation && ( decoder->cfhddata.channel[0].FloatingWindowMaskL || | |
| decoder->cfhddata.channel[0].FloatingWindowMaskR || | |
| decoder->cfhddata.channel[0].FrameKeyStone || | |
| decoder->cfhddata.channel[0].FrameTilt || | |
| decoder->cfhddata.channel[0].HorizontalOffset || | |
| decoder->cfhddata.channel[0].VerticalOffset || | |
| decoder->cfhddata.channel[0].RotationOffset || | |
| decoder->cfhddata.channel[1].FloatingWindowMaskL || | |
| decoder->cfhddata.channel[1].FloatingWindowMaskR || | |
| decoder->cfhddata.channel[1].FrameKeyStone || | |
| decoder->cfhddata.channel[1].FrameTilt || | |
| decoder->cfhddata.channel[1].HorizontalOffset || | |
| decoder->cfhddata.channel[1].VerticalOffset || | |
| decoder->cfhddata.channel[1].RotationOffset || | |
| decoder->cfhddata.channel[0].FrameAutoZoom * decoder->cfhddata.channel[1].FrameDiffZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FloatingWindowMaskL || | |
| decoder->cfhddata.channel[2].FloatingWindowMaskR || | |
| decoder->cfhddata.channel[2].FrameKeyStone || | |
| decoder->cfhddata.channel[2].FrameTilt || | |
| decoder->cfhddata.channel[2].HorizontalOffset || | |
| decoder->cfhddata.channel[2].VerticalOffset || | |
| decoder->cfhddata.channel[2].RotationOffset || | |
| decoder->cfhddata.channel[0].FrameAutoZoom / decoder->cfhddata.channel[2].FrameDiffZoom != 1.0))) | |
| || | |
| (doPrimaries && ( decoder->cfhddata.channel[0].user_blur_sharpen != 0.0 || | |
| decoder->cfhddata.channel[1].user_blur_sharpen != 0.0 || | |
| decoder->cfhddata.channel[2].user_blur_sharpen != 0.0)) | |
| || | |
| (doFraming && ( decoder->cfhddata.channel[0].user_vignette_start != 0.0 || | |
| decoder->cfhddata.channel[1].user_vignette_start != 0.0 || | |
| decoder->cfhddata.channel[2].user_vignette_start != 0.0)) | |
| || | |
| (doFraming && ( memcmp(&decoder->cfhddata.channel[0].FrameMask, &emptyFrameMask, 32) || | |
| decoder->cfhddata.FrameOffsetX || | |
| decoder->cfhddata.FrameOffsetY || | |
| decoder->cfhddata.FrameOffsetR || | |
| decoder->cfhddata.FrameHScale != 1.0 || | |
| decoder->cfhddata.FrameHDynamic != 1.0 || | |
| decoder->cfhddata.channel[1].FrameZoom != 1.0 || | |
| decoder->cfhddata.channel[2].FrameZoom != 1.0)) | |
| || | |
| (doGhostBust && (decoder->channel_blend_type == BLEND_NONE) && (channel_decodes == 2)) | |
| || | |
| (doImageflips && decoder->cfhddata.channel_flip) | |
| || | |
| (decoder->preformatted_3D_type == BLEND_STACKED_ANAMORPHIC) || | |
| (decoder->preformatted_3D_type == BLEND_SIDEBYSIDE_ANAMORPHIC) || | |
| (decoder->channel_blend_type && decoder->frame.resolution == DECODED_RESOLUTION_QUARTER) || // 3D mode generally don't work in quarter res -- this prevents crashes. | |
| ( ((decoder->frame.width+7)/8)*8 != decoder->frame.width || (channel_decodes > 1 && decoder->channel_blend_type != BLEND_NONE) || | |
| decoder->sample_uncompressed) || | |
| (decoder->cfhddata.doMesh) | |
| ) | |
| { | |
| if( output_format == DECODED_FORMAT_BYR1 || | |
| output_format == DECODED_FORMAT_BYR2 || | |
| output_format == DECODED_FORMAT_BYR3 || | |
| output_format == DECODED_FORMAT_BYR4 ) | |
| { | |
| // no manipulation should be applied | |
| } | |
| else | |
| { | |
| use_local_buffer = true; | |
| local_pitch = ((decoder->frame.width+7)/8)*8 * 6; //RGB48 | |
| if(decoder->image_dev_only) | |
| { | |
| decoder->frame.white_point = 13; | |
| decoder->frame.format = internal_format = DECODED_FORMAT_WP13; | |
| } | |
| else if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| { | |
| decoder->frame.white_point = 13; | |
| decoder->frame.format = internal_format = DECODED_FORMAT_W13A; | |
| local_pitch = ((decoder->frame.width+7)/8)*8 * 8; | |
| } | |
| else | |
| { | |
| decoder->frame.white_point = 13; | |
| decoder->frame.format = internal_format = DECODED_FORMAT_WP13; | |
| } | |
| if( decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL || | |
| decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| { | |
| local_pitch *= 2; // need horizontal room to make 3D side by side frame | |
| } | |
| /* | |
| if(output_format == DECODED_FORMAT_WP13 || output_format == DECODED_FORMAT_W13A) | |
| { | |
| // preserve HDR | |
| decoder->frame.format = internal_format = output_format;//DECODED_FORMAT_WP13; // HDR output | |
| if(output_format == DECODED_FORMAT_W13A) | |
| local_pitch = decoder->frame.width * 8; | |
| } | |
| else | |
| { | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| { | |
| decoder->frame.format = internal_format = DECODED_FORMAT_RG64; | |
| local_pitch = decoder->frame.width * 8; | |
| } | |
| else | |
| { | |
| decoder->frame.format = internal_format = DECODED_FORMAT_RG48; | |
| } | |
| }*/ | |
| channel_offset = local_pitch * (decoder->frame.height); | |
| } | |
| } | |
| } | |
| if(output_format == DECODED_FORMAT_BYR4 && decoder->cfhddata.encode_curve_preset == 0) | |
| { | |
| if(decoder->BYR4LinearRestore == NULL) | |
| { | |
| int j,val; | |
| int encode_curve_type = decoder->cfhddata.encode_curve >> 16; | |
| //int encode_curve_neg = encode_curve_type & CURVE_TYPE_NEGATIVE; | |
| float encode_curvebase; | |
| if(encode_curve_type) //1 or 2 | |
| { | |
| if(encode_curve_type & CURVE_TYPE_EXTENDED) | |
| encode_curvebase = (float)(decoder->cfhddata.encode_curve & 0xffff); // use all 16-bits for larger log bases | |
| else | |
| encode_curvebase = (float)((decoder->cfhddata.encode_curve >> 8) & 0xff) / (float)(decoder->cfhddata.encode_curve & 0xff); | |
| } | |
| else | |
| { | |
| encode_curve_type = CURVE_TYPE_LOG; | |
| encode_curvebase = 90.0; | |
| } | |
| #if _ALLOCATOR | |
| decoder->BYR4LinearRestore = (unsigned short *)AllocAligned(decoder->allocator,16384*2, 16); | |
| #else | |
| decoder->BYR4LinearRestore = (unsigned short *)MEMORY_ALIGNED_ALLOC(16384*2, 16); | |
| #endif | |
| for(j=0; j<16384; j++) //0 to 1 | |
| { | |
| switch(encode_curve_type & CURVE_TYPE_MASK) | |
| { | |
| case CURVE_TYPE_LOG: | |
| val = (int)(CURVE_LOG2LIN((float)j/16384.0f, | |
| (float)encode_curvebase) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_GAMMA: | |
| val = (int)(CURVE_GAM2LIN((float)j/16384.0f, | |
| (float)encode_curvebase) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_CINEON: | |
| val = (int)(CURVE_CINEON2LIN((float)j/16384.0f, | |
| (float)encode_curvebase) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_CINE985: | |
| val = (int)(CURVE_CINE9852LIN((float)j/16384.0f, | |
| (float)encode_curvebase) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_PARA: | |
| val = (int)(CURVE_PARA2LIN((float)j/16384.0f, | |
| (int)((decoder->cfhddata.encode_curve >> 8) & 0xff), (int)(decoder->cfhddata.encode_curve & 0xff)) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_CSTYLE: | |
| val = (int)(CURVE_CSTYLE2LIN((float)j/16384.0f, | |
| (int)((decoder->cfhddata.encode_curve >> 8) & 0xff)) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_SLOG: | |
| val = (int)(CURVE_SLOG2LIN((float)j/16384.0f) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_LOGC: | |
| val = (int)(CURVE_LOGC2LIN((float)j/16384.0f) * 65535.0f); | |
| break; | |
| case CURVE_TYPE_LINEAR: | |
| default: | |
| val = j; | |
| break; | |
| } | |
| if(val < 0) val = 0; | |
| if(val > 65535) val = 65535; | |
| decoder->BYR4LinearRestore[j] = val; | |
| } | |
| } | |
| } | |
| //DAN20120319 - removed | |
| /*if(decoder->channel_mix_half_res) //decoding half but scaling to double the output size | |
| { | |
| local_pitch *= 2; | |
| channel_offset = local_pitch * (decoder->frame.height*2); | |
| }*/ | |
| if(use_local_buffer == true) // need buffer for anaglyph and other 3D presentation formats | |
| { | |
| int stereoframesize = channel_offset * channel_decodes/*stacked frames*/; | |
| if(decoder->source_channels == 1 && decoder->preformatted_3D_type == BLEND_NONE) | |
| stereoframesize = channel_offset; | |
| if(channel_decodes == 1 && decoder->preformatted_3D_type != BLEND_NONE) | |
| stereoframesize = channel_offset * 2; | |
| if(channel_decodes == 2 && decoder->source_channels == 1 && decoder->channel_blend_type != BLEND_NONE) | |
| stereoframesize = channel_offset * 2; | |
| if(decoder->StereoBuffer==NULL || decoder->StereoBufferSize < stereoframesize) | |
| { | |
| #if _ALLOCATOR | |
| if(decoder->StereoBuffer) | |
| { | |
| FreeAligned(decoder->allocator, decoder->StereoBuffer); | |
| decoder->StereoBuffer = NULL; | |
| } | |
| decoder->StereoBuffer = (PIXEL16U *)AllocAligned(decoder->allocator, stereoframesize+256, 16); //DAN20130517 add 256, as 2.7K half we are write off the buffers end for zoom, don't know why yet. | |
| #else | |
| if(decoder->StereoBuffer) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->StereoBuffer); | |
| decoder->StereoBuffer = NULL; | |
| } | |
| decoder->StereoBuffer = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(stereoframesize+256, 16); //DAN20130517 add 256, as 2.7K half we are write off the buffers end for zoom, don't know why yet. | |
| #endif | |
| assert(decoder->StereoBuffer != NULL); | |
| if (! (decoder->StereoBuffer != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->StereoBufferSize = stereoframesize; | |
| } | |
| decoder->StereoBufferFormat = internal_format; | |
| local_buffer = (uint8_t *)decoder->StereoBuffer; | |
| local_output = local_buffer; | |
| } | |
| DecodeEntropyInit(decoder); | |
| //swapped -- Maybe useful for double height decodes. | |
| /* if(channel_decodes == 2 && channel_swapped_flags & FLAG3D_SWAPPED) | |
| { | |
| local_output += channel_offset; | |
| channel_offset = -channel_offset; | |
| }*/ | |
| decoder->use_local_buffer = use_local_buffer ? 1 : 0; | |
| if(channel_decodes == 2 && decoder->parallelDecoder == NULL && decoder->source_channels > 1) | |
| { | |
| int encoded_width = decoder->frame.width; | |
| int encoded_height = decoder->frame.height; | |
| if (decoder->frame.resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| // Compute the encoded dimensions from the frame dimensions | |
| encoded_width *= 2; | |
| encoded_height *= 2; | |
| } | |
| else if (decoder->frame.resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| // Compute the encoded dimensions from the frame dimensions | |
| encoded_width *= 4; | |
| encoded_height *= 4; | |
| } | |
| else if (decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| // Compute the encoded dimensions from the frame dimensions | |
| encoded_width *= 2; | |
| } | |
| else if (decoder->frame.resolution == DECODED_RESOLUTION_HALF_VERTICAL) | |
| { | |
| // Compute the encoded dimensions from the frame dimensions | |
| encoded_height *= 2; | |
| } | |
| #if _ALLOCATOR | |
| decoder->parallelDecoder = (DECODER *)Alloc(decoder->allocator, sizeof(DECODER)); | |
| if(decoder->parallelDecoder) | |
| { | |
| memset(decoder->parallelDecoder, 0, sizeof(DECODER)); | |
| DecodeInit(decoder->allocator, decoder->parallelDecoder, encoded_width, encoded_height, | |
| internal_format, DECODED_RESOLUTION_FULL, NULL); | |
| } | |
| #else | |
| decoder->parallelDecoder = (DECODER *)MEMORY_ALLOC(sizeof(DECODER)); | |
| if(decoder->parallelDecoder) | |
| { | |
| memset(decoder->parallelDecoder, 0, sizeof(DECODER)); | |
| decoder->parallelDecoder->thread_cntrl = decoder->thread_cntrl; | |
| DecodeInit(decoder->parallelDecoder, encoded_width, encoded_height, | |
| internal_format, DECODED_RESOLUTION_FULL, NULL); | |
| } | |
| #endif | |
| } | |
| // Using the parallel decoder? | |
| if (decoder->parallelDecoder) | |
| { | |
| // Initialize the parallel decoder with parameters from the regular decoder | |
| memcpy(&decoder->parallelDecoder->cfhddata, &decoder->cfhddata, sizeof(CFHDDATA)); | |
| memcpy(decoder->parallelDecoder->licensekey,decoder->licensekey, 16); | |
| DecodeEntropyInit(decoder->parallelDecoder); | |
| DecodeOverrides(decoder->parallelDecoder, decoder->overrideData, decoder->overrideSize); | |
| decoder->parallelDecoder->channel_decodes = decoder->channel_decodes; | |
| decoder->parallelDecoder->channel_blend_type = decoder->channel_blend_type; | |
| decoder->parallelDecoder->flags = decoder->flags; | |
| decoder->parallelDecoder->frame = decoder->frame; | |
| decoder->parallelDecoder->use_local_buffer = use_local_buffer ? 1 : 0; | |
| decoder->parallelDecoder->codec.encoded_format = decoder->codec.encoded_format; | |
| if(decoder->parallelDecoder->decoder_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->parallelDecoder->decoder_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->parallelDecoder->decoder_thread.pool, | |
| 1, // | |
| ParallelThreadProc, | |
| decoder->parallelDecoder); | |
| } | |
| } | |
| if(channel_decodes == 2 && decoder->real_channels > 1 && decoder->parallelDecoder && decoder->parallelDecoder->decoder_thread.pool.thread_count) | |
| { | |
| // Second stream as a thread. | |
| BITSTREAM second_input = *input; | |
| if(decoder->cfhddata.FramingFlags & 2 && decoder->source_channels >= 2) // channel swap | |
| { | |
| BITSTREAM leftEye_input = *input; | |
| SkipVideoChannel(decoder, &leftEye_input, 2); // 3D work | |
| *input = leftEye_input; | |
| SkipVideoChannel(decoder, &second_input, 1); // 3D work | |
| } | |
| else | |
| SkipVideoChannel(decoder, &second_input, 2); // 3D work | |
| decoder->channel_current = 0; | |
| decoder->parallelDecoder->channel_current = 1; | |
| // Instead of reading the metadata databases again, use the ones in the main decoder | |
| OverrideCFHDDATAUsingParent(decoder->parallelDecoder, decoder, input->lpCurrentBuffer, input->nWordsUsed); | |
| // DAN20110404 Use left (first) eye metadata for both eyes (just in case right GUID is bad.) | |
| // OverrideCFHDDATA(decoder->parallelDecoder, input->lpCurrentBuffer, input->nWordsUsed); | |
| //OverrideCFHDDATA(decoder->parallelDecoder, second_input.lpCurrentWord, second_input.nWordsUsed); | |
| // Hack, this gets lost | |
| decoder->parallelDecoder->cfhddata.split_CC_position = decoder->cfhddata.split_CC_position; | |
| #if (_THREADED && _GRAPHICS) | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_BURNINS && output) | |
| { | |
| if(decoder->cfhddata.BurninFlags & 3) // overlays / tools | |
| { | |
| DrawStartThreaded(decoder); | |
| } | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| decoder->parallelDecoder->decoder_thread.input = &second_input; | |
| if(use_local_buffer == false && | |
| (decoder->frame.format == DECODED_FORMAT_RGB32 || decoder->frame.format == DECODED_FORMAT_RGB24)) | |
| { | |
| decoder->parallelDecoder->decoder_thread.output = local_output; | |
| local_output += channel_offset; | |
| } | |
| else | |
| { | |
| decoder->parallelDecoder->decoder_thread.output = local_output + channel_offset; | |
| } | |
| decoder->parallelDecoder->decoder_thread.pitch = local_pitch; | |
| decoder->parallelDecoder->decoder_thread.colorparams = colorparams; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->parallelDecoder->decoder_thread.pool, 1); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->parallelDecoder->decoder_thread.pool, THREAD_MESSAGE_START); | |
| // do the first channel | |
| { | |
| TAGVALUE segment; | |
| int sample_type; | |
| #if _THREADED | |
| decoder->entropy_worker_new.next_queue_num = 0; | |
| decoder->entropy_worker_new.threads_used = 0; | |
| #endif | |
| // Get the type of sample | |
| segment = GetTagValue(input); | |
| assert(segment.tuple.tag == CODEC_TAG_SAMPLE); | |
| if (!IsValidSegment(input, segment, CODEC_TAG_SAMPLE)) { | |
| decoder->error = CODEC_ERROR_BITSTREAM; | |
| STOP(tk_decompress); | |
| return false; | |
| } | |
| sample_type = segment.tuple.value; | |
| switch (sample_type) | |
| { | |
| case SAMPLE_TYPE_GROUP: // Group of frames (decode the first frame) | |
| result = DecodeSampleGroup(decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_FRAME: // Decode the second or later frame in a group | |
| result = DecodeSampleFrame(decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_IFRAME: // Decode a sample that represents an isolated frame | |
| result = DecodeSampleIntraFrame(decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_SEQUENCE_HEADER: | |
| // The video sequence header is ignored | |
| result = true; | |
| break; | |
| default: | |
| // Need to fill the output frame | |
| //error = CODEC_ERROR_SAMPLE_TYPE; | |
| result = false; | |
| } | |
| } | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->parallelDecoder->decoder_thread.pool); | |
| } | |
| else | |
| { | |
| while(channel_decodes > 0) | |
| { | |
| TAGVALUE segment; | |
| int sample_type; | |
| local_decoder->channel_current = channel_current++; | |
| //OverrideCFHDDATA(local_decoder, input->lpCurrentBuffer, input->nWordsUsed); | |
| #if (_THREADED && _GRAPHICS) | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_BURNINS && output) | |
| { | |
| if(decoder->cfhddata.BurninFlags & 3) //overlays / tools | |
| { | |
| DrawStartThreaded(decoder); | |
| } | |
| } | |
| #endif | |
| #if _THREADED | |
| local_decoder->entropy_worker_new.next_queue_num = 0; | |
| local_decoder->entropy_worker_new.threads_used = 0; | |
| #endif | |
| if(decoder->image_dev_only) | |
| { | |
| result = DecodeSampleIntraFrame(local_decoder, input, local_output, local_pitch, colorparams); | |
| } | |
| else | |
| { | |
| // Get the type of sample | |
| segment = GetTagValue(input); | |
| assert(segment.tuple.tag == CODEC_TAG_SAMPLE); | |
| if (!IsValidSegment(input, segment, CODEC_TAG_SAMPLE)) { | |
| local_decoder->error = CODEC_ERROR_BITSTREAM; | |
| STOP(tk_decompress); | |
| return false; | |
| } | |
| sample_type = segment.tuple.value; | |
| switch (sample_type) | |
| { | |
| case SAMPLE_TYPE_GROUP: // Group of frames (decode the first frame) | |
| result = DecodeSampleGroup(local_decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_FRAME: // Decode the second or later frame in a group | |
| result = DecodeSampleFrame(local_decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_IFRAME: // Decode a sample that represents an isolated frame | |
| result = DecodeSampleIntraFrame(local_decoder, input, local_output, local_pitch, colorparams); | |
| break; | |
| case SAMPLE_TYPE_SEQUENCE_HEADER: | |
| // The video sequence header is ignored | |
| result = true; | |
| break; | |
| default: | |
| // Need to fill the output frame | |
| //error = CODEC_ERROR_SAMPLE_TYPE; | |
| result = false; | |
| } | |
| } | |
| if(ConvertPreformatted3D(decoder, use_local_buffer, internal_format, channel_mask, local_output, local_pitch, &channel_offset)) | |
| { | |
| channel_decodes = 0; | |
| } | |
| else | |
| { | |
| channel_decodes--; | |
| local_output += channel_offset; | |
| if(decoder->parallelDecoder) | |
| { | |
| local_decoder = decoder->parallelDecoder; | |
| } | |
| } | |
| } | |
| } | |
| if(use_local_buffer && output) | |
| { | |
| decoder->use_local_buffer = 0; | |
| #if WARPSTUFF | |
| WarpFrame(decoder, local_buffer, local_pitch, decoder->StereoBufferFormat); | |
| MaskFrame(decoder, local_buffer, local_pitch, decoder->StereoBufferFormat); | |
| #endif | |
| ConvertLocalToOutput(decoder, output, pitch, output_format, local_buffer, local_pitch, abs(channel_offset)); | |
| } | |
| else | |
| { | |
| #if WARPSTUFF | |
| WarpFrame(decoder, output, pitch, output_format); | |
| MaskFrame(decoder, output, pitch, output_format); | |
| #endif | |
| } | |
| if(decoder->channel_mix_half_res) //HACK | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_FULL; | |
| decoder->frame.width *= 2; | |
| decoder->frame.height *= 2; | |
| decoder->channel_mix_half_res = 0; | |
| } | |
| if( decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) //HACK | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_FULL; | |
| decoder->frame.width *= 2; | |
| } | |
| if( decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) //HACK | |
| { | |
| decoder->frame.resolution = DECODED_RESOLUTION_FULL; | |
| } | |
| #if _GRAPHICS | |
| if(decoder->cfhddata.process_path_flags & PROCESSING_BURNINS && output) | |
| { | |
| PaintFrame(decoder, output, pitch, output_format); | |
| } | |
| #endif | |
| STOP(tk_decompress); | |
| // Return indication of whether decoding succeeded or failed | |
| return result; | |
| } | |
| // Decode a sample that encoded a group of frames (return the first frame) | |
| bool DecodeSampleGroup(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int32_t frame_size = decoder->frame.height * pitch; | |
| int resolution = decoder->frame.resolution; | |
| bool result = true; | |
| static int subband_wavelet_index[] = {5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 1, 1, 1, 0, 0, 0}; | |
| static int subband_band_index[] = {0, 1, 2, 3, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3}; | |
| int num_subbands = sizeof(subband_wavelet_index)/sizeof(subband_wavelet_index[0]); | |
| #if (0 && DEBUG) | |
| // Force quarter resolution decoding for debug that feature | |
| resolution = DECODED_RESOLUTION_QUARTER; | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoding sample group\n"); | |
| } | |
| #endif | |
| START(tk_decoding); | |
| // Initialize the codec state | |
| InitCodecState(&decoder->codec); | |
| // Allocate the transform data structure for the group of frames | |
| AllocDecoderGroup(decoder); | |
| // Initialize the tables for decoding the wavelet transforms | |
| InitWaveletDecoding(decoder, subband_wavelet_index, subband_band_index, num_subbands); | |
| // Clear the flags in the wavelet transforms | |
| ClearTransformFlags(decoder); | |
| // Process the tag value pairs until an encoded subband is found | |
| for (;;) | |
| { | |
| TAGVALUE segment; | |
| // Read the next tag value pair from the bitstream | |
| //segment = GetTagValue(input); | |
| segment = GetSegment(input); | |
| assert(input->error == BITSTREAM_ERROR_OKAY); | |
| if (input->error != BITSTREAM_ERROR_OKAY) { | |
| decoder->error = CODEC_ERROR_BITSTREAM; | |
| result = false; | |
| break; | |
| } | |
| // Update the codec state with the information in the tag value pair | |
| { | |
| TAGWORD tag = segment.tuple.tag; | |
| TAGWORD value = segment.tuple.value; | |
| // Use the tag value pair to update the codec state | |
| error = UpdateCodecState(decoder, input, codec, tag, value); | |
| assert(error == CODEC_ERROR_OKAY); | |
| if (error != CODEC_ERROR_OKAY) | |
| { | |
| decoder->error = error; | |
| result = false; | |
| break; | |
| //NOTE: Consider moving the error code into the codec state | |
| } | |
| } | |
| // Check whether the group has been decoded | |
| if (codec->sample_done) break; | |
| // Skip the rest of the current channel? | |
| if (CanSkipChannel(decoder, resolution)) | |
| { | |
| if(codec->channel == 3 && (decoder->frame.format == DECODED_FORMAT_YUYV || decoder->frame.format == DECODED_FORMAT_UYVY)) | |
| { | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| codec->num_channels = 3; | |
| goto decoding_complete; | |
| } | |
| else | |
| if (resolution == DECODED_RESOLUTION_LOWPASS_ONLY) | |
| { | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| } | |
| else | |
| { | |
| // Compute the bitstream position after the current channel | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Get the temporal wavelet | |
| int temporal_index = 2; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| IMAGE *wavelet = transform->wavelet[temporal_index]; | |
| #if (0 && DEBUG) | |
| if (IsBandValid(wavelet, HIGHPASS_BAND)) | |
| { | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| sprintf(label, "Temporal-decode-%d-", count); | |
| DumpBandPGM(label, wavelet, HIGHPASS_BAND, NULL); | |
| } | |
| count++; | |
| } | |
| #endif | |
| #if _THREADED_DECODER | |
| // Ready to invert this wavelet to get the lowpass band in the lower wavelet? | |
| //if (DecodedBandsValid(wavelet, temporal_index)) | |
| if (resolution != DECODED_RESOLUTION_QUARTER || (decoder->codec.encoded_format == ENCODED_FORMAT_BAYER)) | |
| #else | |
| // Have all bands in the temporal wavelet been decoded? | |
| //if (wavelet && BANDS_ALL_VALID(wavelet)) | |
| if (AllBandsValid(wavelet)) | |
| #endif | |
| { | |
| //PIXEL *buffer = (PIXEL *)decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| int precision = codec->precision; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Reconstructing the lowpass bands in the first level wavelets\n"); | |
| } | |
| #endif | |
| #if _THREADED_DECODER | |
| // Add the temporal inverse transform to the processing queue | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| { | |
| ReconstructWaveletBand(decoder, transform, channel, wavelet, temporal_index, | |
| precision, &decoder->scratch, 1); | |
| QueueThreadedTransform(decoder, channel, temporal_index); | |
| } | |
| else | |
| #endif | |
| { | |
| // Reconstruct the lowpass bands in the first level wavelets | |
| //ReconstructWaveletBand(transform, channel, wavelet, temporal_index, precision, buffer, buffer_size); | |
| ReconstructWaveletBand(decoder, transform, channel, wavelet, temporal_index, | |
| precision, &decoder->scratch, 0 ); | |
| } | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| // Note that the subband flags are also reset when the channel header is decoded | |
| } | |
| // Was the wavelet created? | |
| else if (wavelet == NULL) | |
| { | |
| // The temporal wavelet is not created during quarter resolution decoding | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| } | |
| //TODO: Improve quarter resolution decoding so that the wavelet is created? | |
| } | |
| } | |
| } | |
| decoding_complete: | |
| STOP(tk_decoding); | |
| #if (0 && DEBUG) | |
| if (logfile) | |
| { | |
| char label[_MAX_PATH]; | |
| int channel; | |
| for (channel = 0; channel < codec->num_channels; channel++) | |
| { | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| IMAGE *wavelet = transform->wavelet[2]; | |
| uint8_t *data = (uint8_t *)wavelet->band[HIGHPASS_BAND]; | |
| int height = wavelet->height; | |
| int pitch = wavelet->pitch; | |
| int size = height * pitch; | |
| int band; | |
| for (band = 0; band < wavelet->num_bands; band++) | |
| { | |
| sprintf(label, "Temporal channel: %d, band: %d", channel, band); | |
| DumpBandStatistics(label, wavelet, band, logfile); | |
| #if 0 | |
| sprintf(label, "Temporal-channel%d-band%d-", channel, band); | |
| DumpBandPGM(label, wavelet, band, NULL); | |
| #endif | |
| } | |
| assert(size > 0); | |
| ZeroMemory(data, size); | |
| } | |
| } | |
| #endif | |
| if (result) | |
| { | |
| // Two frames have been decoded | |
| decoder->gop_length = 2; | |
| decoder->frame_count += 2; | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, | |
| "DecodeSampleGroup, decoder: 0x%p, GOP length: %d\n", | |
| decoder, decoder->gop_length); | |
| } | |
| #endif | |
| // Return the first frame in the group | |
| if (!decoder->no_output) | |
| { | |
| #if 0 | |
| // Decoding to quarter frame resolution at full frame rate? | |
| if (resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| int num_channels = codec->num_channels; | |
| FRAME_INFO *info = &decoder->frame; | |
| char *buffer = decoder->buffer; | |
| size_t buffer_size = decoder->buffer_size; | |
| uint8_t *frame1 = output; | |
| uint8_t *frame2 = decoder->output2; | |
| assert(frame2 != NULL); | |
| // Reconstruct two frames at quarter resolution | |
| ReconstructQuarterFrame(decoder, num_channels, | |
| frame1, frame2, pitch, | |
| info, buffer, buffer_size); | |
| } | |
| else | |
| #endif | |
| // Finish computing the output frame | |
| ReconstructSampleFrameToBuffer(decoder, 0, output, pitch); | |
| } | |
| if (decoder->error != CODEC_ERROR_OKAY) { | |
| result = false; | |
| } | |
| #if TIMING | |
| // Increment the count of bytes that have been decoded | |
| decode_byte_count += (COUNTER)BitstreamByteCount(input); | |
| #endif | |
| } | |
| if (!result) | |
| { | |
| // Check that the frame can be cleared | |
| assert(frame_size > 0); | |
| if (frame_size > 0) | |
| { | |
| // Zero the frame | |
| memset(output, 0, frame_size); | |
| } | |
| } | |
| return result; | |
| } | |
| // Decode a sample that represents the second frame in a group | |
| bool DecodeSampleFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int32_t frame_size = decoder->frame.height * pitch; | |
| bool result = true; | |
| START(tk_decoding); | |
| // Decode the tag value pairs in the frame sample | |
| for (;;) | |
| { | |
| TAGWORD tag; | |
| TAGWORD value; | |
| // Read the next tag value pair from the bitstream | |
| //TAGVALUE segment = GetTagValue(input); | |
| TAGVALUE segment = GetSegment(input); | |
| assert(input->error == BITSTREAM_ERROR_OKAY); | |
| if (input->error != BITSTREAM_ERROR_OKAY) { | |
| decoder->error = CODEC_ERROR_BITSTREAM; | |
| result = false; | |
| break; | |
| } | |
| // Update the codec state with the information in the tag value pair | |
| tag = segment.tuple.tag; | |
| value = segment.tuple.value; | |
| // Use the tag value pair to update the codec state | |
| error = UpdateCodecState(decoder, input, codec, tag, value); | |
| assert(error == CODEC_ERROR_OKAY); | |
| if (error != CODEC_ERROR_OKAY) { | |
| decoder->error = error; | |
| result = false; | |
| break; | |
| } | |
| // End of the frame header? | |
| if (tag == CODEC_TAG_FRAME_INDEX) break; | |
| } | |
| STOP(tk_decoding); | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, | |
| "DecodeSampleFrame, decoder: 0x%p, GOP length: %d\n", | |
| decoder, decoder->gop_length); | |
| } | |
| #endif | |
| if (result) | |
| { | |
| // Return the second frame in the group | |
| // assert(decoder->gop_length >= 2); | |
| if (decoder->gop_length >= 2) | |
| { | |
| int frame_index = 1; // Display the second frame in the group | |
| ReconstructSampleFrameToBuffer(decoder, frame_index, output, pitch); | |
| if (decoder->error != CODEC_ERROR_OKAY) { | |
| result = false; | |
| } | |
| } | |
| else if (decoder->gop_length > 0) | |
| { | |
| int frame_index = 0; // Display the first frame in the group | |
| ReconstructSampleFrameToBuffer(decoder, frame_index, output, pitch); | |
| if (decoder->error != CODEC_ERROR_OKAY) { | |
| result = false; | |
| } | |
| } | |
| #if TIMING | |
| // Increment the count of bytes that have been decoded | |
| decode_byte_count += (COUNTER)BitstreamByteCount(input); | |
| #endif | |
| } | |
| if (!result) | |
| { | |
| // Frame type that is not handled | |
| // Check that the frame can be cleared | |
| assert(frame_size > 0); | |
| if (frame_size > 0) | |
| { | |
| // Zero the frame | |
| memset(output, 0, frame_size); | |
| } | |
| } | |
| return result; | |
| } | |
| // Decode a sample that encodes an intra frame | |
| bool DecodeSampleIntraFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int32_t frame_size = decoder->frame.height * pitch; | |
| int resolution = decoder->frame.resolution; | |
| bool result = true; | |
| static int subband_wavelet_index[] = {2, 2, 2, 2, 1, 1, 1, 0, 0, 0}; | |
| static int subband_band_index[] = {0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3}; | |
| int num_subbands = sizeof(subband_wavelet_index)/sizeof(subband_wavelet_index[0]); | |
| START(tk_decoding); | |
| if(decoder->image_dev_only) goto decoding_completeI; | |
| // Initialize the codec state | |
| InitCodecState(&decoder->codec); | |
| // Allocate the transform data structure for the group of frames | |
| AllocDecoderGroup(decoder); | |
| // Initialize the tables for decoding the wavelet transforms | |
| InitWaveletDecoding(decoder, subband_wavelet_index, subband_band_index, num_subbands); | |
| // Clear the flags in the wavelet transforms | |
| ClearTransformFlags(decoder); | |
| //Force V210 output for debugging ***DEBUG*** | |
| //decoder->frame.format = DECODED_FORMAT_V210; | |
| // Process the tag value pairs until an encoded subband is found | |
| for (;;) | |
| { | |
| TAGVALUE segment; | |
| // Read the next tag value pair from the bitstream | |
| segment = GetSegment(input); | |
| assert(input->error == BITSTREAM_ERROR_OKAY); | |
| if (input->error != BITSTREAM_ERROR_OKAY) { | |
| decoder->error = CODEC_ERROR_BITSTREAM; | |
| result = false; | |
| break; | |
| } | |
| { | |
| TAGWORD tag = segment.tuple.tag; | |
| TAGWORD value = segment.tuple.value; | |
| // Use the tag value pair to update the codec state | |
| error = UpdateCodecState(decoder, input, codec, tag, value); | |
| assert(error == CODEC_ERROR_OKAY); | |
| if (error != CODEC_ERROR_OKAY) { | |
| decoder->error = error; | |
| result = false; | |
| break; | |
| //NOTE: Consider moving the error code into the codec state | |
| } | |
| } | |
| // Check whether the group has been decoded | |
| if (codec->sample_done) { | |
| break; | |
| } | |
| // Skip the rest of the current channel? | |
| if (CanSkipChannel(decoder, resolution)) | |
| { | |
| if(codec->channel == 3 && (decoder->frame.format == DECODED_FORMAT_YUYV || decoder->frame.format == DECODED_FORMAT_UYVY)) | |
| { | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| codec->num_channels = 3; | |
| goto decoding_completeI; | |
| } | |
| else if (resolution == DECODED_RESOLUTION_LOWPASS_ONLY) | |
| { | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| } | |
| else | |
| { | |
| // Compute the bitstream position after the current channel | |
| int channel = codec->channel; | |
| uint32_t channel_size = codec->channel_size[channel]; | |
| uint8_t *position = codec->channel_position + channel_size; | |
| // Get the highest wavelet in the pyramid | |
| int wavelet_index = 2; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| IMAGE *wavelet = transform->wavelet[wavelet_index]; | |
| #if _THREADED_DECODER | |
| // Ready to invert this wavelet to get the lowpass band in the lower wavelet? | |
| //if (DecodedBandsValid(wavelet, temporal_index)) | |
| if (resolution != DECODED_RESOLUTION_QUARTER || (decoder->codec.encoded_format == ENCODED_FORMAT_BAYER)) | |
| #else | |
| // Have all bands in the wavelet been decoded? | |
| if (AllBandsValid(wavelet)) | |
| #endif | |
| { | |
| //PIXEL *buffer = (PIXEL *)decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| int precision = codec->precision; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| char label[_MAX_PATH]; | |
| int band; | |
| sprintf(label, "Channel: %d, index: %d", channel, wavelet_index); | |
| DumpImageStatistics(label, wavelet, logfile); | |
| #if 1 | |
| for (band = 1; band < wavelet->num_bands; band++) | |
| { | |
| sprintf(label, "Channel: %d, index: %d, band: %d", channel, wavelet_index, band); | |
| DumpBandStatistics(label, wavelet, band, logfile); | |
| } | |
| #endif | |
| } | |
| #endif | |
| #if (0 & DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Reconstructing the lowpass bands in the first level wavelets\n"); | |
| } | |
| #endif | |
| #if _THREADED_DECODER | |
| // Add the inverse spatial transform to the processing queue | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| { | |
| ReconstructWaveletBand(decoder, transform, channel, wavelet, wavelet_index, | |
| precision, &decoder->scratch, 1); | |
| QueueThreadedTransform(decoder, channel, wavelet_index); | |
| } | |
| else | |
| #endif | |
| { | |
| // Reconstruct the lowpass bands in the first level wavelets | |
| //ReconstructWaveletBand(transform, channel, wavelet, temporal_index, precision, buffer, buffer_size); | |
| ReconstructWaveletBand(decoder, transform, channel, wavelet, wavelet_index, | |
| precision, &decoder->scratch, 0); | |
| } | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| // Note that the subband flags are also reset when the channel header is decoded | |
| } | |
| // Was the wavelet created? | |
| //else if (wavelet == NULL) | |
| else | |
| { | |
| // The wavelet may not have been created during quarter resolution decoding | |
| // The wavelet should have been created if all bands are valid | |
| assert(wavelet != NULL); | |
| // Advance the bitstream to the next channel | |
| SetBitstreamPosition(input, position); | |
| // Reset the decoded subband flags (otherwise this code will be executed again) | |
| codec->decoded_subband_flags = 0; | |
| } | |
| //TODO: Improve quarter resolution decoding so that the wavelet is created? | |
| } | |
| } | |
| } | |
| decoding_completeI: | |
| STOP(tk_decoding); | |
| if (result) | |
| { | |
| // One frame has been decoded | |
| decoder->gop_length = 1; | |
| decoder->frame_count += 1; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, | |
| "DecodeSampleIntraFrame, decoder: 0x%p, GOP length: %d\n", | |
| decoder, decoder->gop_length); | |
| } | |
| #endif | |
| // Return the first frame (the only frame that was decoded) | |
| if (!decoder->no_output) | |
| { | |
| int uncompressed = decoder->uncompressed_chunk && decoder->uncompressed_size && decoder->sample_uncompressed; | |
| if ( !uncompressed && resolution == DECODED_RESOLUTION_QUARTER && (decoder->codec.encoded_format != ENCODED_FORMAT_BAYER)) | |
| { | |
| //CODEC_STATE *codec = &decoder->codec; | |
| TRANSFORM **transform_array = decoder->transform; | |
| int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| FRAME_INFO *info = &decoder->frame; | |
| int precision = codec->precision; | |
| #if _THREADED_DECODER | |
| // Wait until the transform thread has finished all pending transforms | |
| WaitForTransformThread(decoder); | |
| #endif | |
| ConvertQuarterFrameToBuffer(decoder, transform_array, num_channels, output, pitch, info, precision); | |
| } | |
| else | |
| { | |
| // Finish computing the output frame | |
| ReconstructSampleFrameToBuffer(decoder, 0, output, pitch); | |
| } | |
| } | |
| if (decoder->error != CODEC_ERROR_OKAY) { | |
| result = false; | |
| } | |
| #if TIMING | |
| // Increment the count of bytes that have been decoded | |
| decode_byte_count += (COUNTER)BitstreamByteCount(input); | |
| #endif | |
| } | |
| if (!result) | |
| { | |
| // Check that the frame can be cleared | |
| assert(frame_size > 0); | |
| if (frame_size > 0) | |
| { | |
| // Zero the frame | |
| memset(output, 0, frame_size); | |
| } | |
| } | |
| return result; | |
| } | |
| // Decode a sample channel header | |
| bool DecodeSampleChannelHeader(DECODER *decoder, BITSTREAM *input) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| CODEC_STATE *codec = &decoder->codec; | |
| int channel = codec->channel; | |
| CHANNEL_HEADER header; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| TRANSFORM *next_transform; | |
| // Advance to the next channel | |
| channel++; | |
| // Get the next transform for decoded information | |
| //TRANSFORM *next_transform = AllocGroupTransform(group, channel); | |
| // Decode the rest of the channel header | |
| error = DecodeChannelHeader(input, &header, SAMPLE_TYPE_CHANNEL); | |
| assert(error == CODEC_ERROR_OKAY); | |
| decoder->error = error; | |
| if (error != CODEC_ERROR_OKAY) return false; | |
| // The decoder is not able to skip channels | |
| assert(header.channel == channel); | |
| // Initialize the next transform using the previous one | |
| next_transform = decoder->transform[channel]; | |
| InitChannelTransform(next_transform, transform); | |
| // Update the channel | |
| codec->channel = channel; | |
| // Reset the subband counter | |
| codec->band.subband = 0; | |
| // Reset the decoded subband flags | |
| codec->decoded_subband_flags = 0; | |
| // Loop back to decode the next channel | |
| //transform = next_transform; | |
| return true; | |
| } | |
| // Decode the coefficients in a subband | |
| bool DecodeSampleSubband(DECODER *decoder, BITSTREAM *input, int subband) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int channel = codec->channel; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| int *subband_wavelet_index = decoder->subband_wavelet_index; | |
| // Used for quarter resolution and threaded decoding | |
| int transform_type = transform->type; | |
| // Wavelet parameters | |
| int width; | |
| int height; | |
| int level; | |
| int type; | |
| int band; | |
| int threading = 1; | |
| // Wavelet containing the band to decode | |
| int index; | |
| IMAGE *wavelet = NULL; | |
| bool result; | |
| if(subband >= 7 && subband <= 10 && transform_type == TRANSFORM_TYPE_FIELDPLUS) | |
| threading = 0; | |
| // Update the transform data structure from the codec state | |
| UpdateCodecTransform(transform, codec); | |
| // Is this an empty band? | |
| if (subband == 255) | |
| { | |
| // Decode an empty band | |
| // This wavelet is the temporal wavelet | |
| index = 2; | |
| wavelet = transform->wavelet[index]; | |
| // Get the wavelet parameters decoded from the bitstream | |
| width = codec->band.width; | |
| height = codec->band.height; | |
| level = codec->highpass.wavelet_level; | |
| type = codec->highpass.wavelet_type; | |
| band = codec->band.number; | |
| // The empty band should be the highpass band in a temporal wavelet | |
| assert(type == WAVELET_TYPE_TEMPORAL && band == 1); | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| wavelet = GetWaveletThreadSafe(decoder, transform, index, width, height, level, type); | |
| #else | |
| // Allocate (or reallocate) the wavelet | |
| #if _ALLOCATOR | |
| wavelet = ReallocWaveletEx(decoder->allocator, wavelet, width, height, level, type); | |
| #else | |
| wavelet = ReallocWaveletEx(wavelet, width, height, level, type); | |
| #endif | |
| // Save this wavelet in the transform data structure | |
| transform->wavelet[index] = wavelet; | |
| #endif | |
| // Set the wavelet parameters | |
| wavelet->pixel_type[band] = PIXEL_TYPE_16S; | |
| wavelet->num_bands = 2; | |
| result = DecodeSampleEmptyBand(decoder, input, wavelet, band); | |
| // Set the subband number for the next band expected in the bitstream | |
| codec->band.subband = 11; | |
| } | |
| // Is this a highpass band? | |
| else if (subband > 0) | |
| { | |
| // Decode a highpass band | |
| // Get the wavelet that contains this subband | |
| index = subband_wavelet_index[subband]; | |
| wavelet = transform->wavelet[index]; | |
| // Get the wavelet parameters decoded from the bitstream | |
| width = codec->band.width; | |
| height = codec->band.height; | |
| level = codec->highpass.wavelet_level; | |
| type = codec->highpass.wavelet_type; | |
| band = codec->band.number; | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| wavelet = GetWaveletThreadSafe(decoder, transform, index, width, height, level, type); | |
| #else | |
| // Allocate (or reallocate) the wavelet | |
| #if _ALLOCATOR | |
| wavelet = ReallocWaveletEx(decoder->allocator, wavelet, width, height, level, type); | |
| #else | |
| wavelet = ReallocWaveletEx(wavelet, width, height, level, type); | |
| #endif | |
| // Save this wavelet in the transform data structure | |
| transform->wavelet[index] = wavelet; | |
| #endif | |
| result = DecodeSampleHighPassBand(decoder, input, wavelet, band, threading); | |
| if (result) | |
| { | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandStartedFlags(decoder, wavelet, band); | |
| } | |
| // Reset the default encoding method | |
| codec->band.encoding = BAND_ENCODING_RUNLENGTHS; | |
| // Set the subband number for the next band expected in the bitstream | |
| codec->band.subband = subband + 1; | |
| } | |
| else | |
| { | |
| // Decode a lowpass band | |
| // Get the wavelet that contains this subband | |
| index = subband_wavelet_index[0]; | |
| wavelet = transform->wavelet[index]; | |
| // Get the wavelet parameters decoded from the bitstream | |
| width = codec->lowpass.width; | |
| height = codec->lowpass.height; | |
| level = codec->lowpass.level; | |
| type = codec->first_wavelet; | |
| //band = codec->band.number; | |
| band = 0; | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| wavelet = GetWaveletThreadSafe(decoder, transform, index, width, height, level, type); | |
| #else | |
| // Allocate (or reallocate) the wavelet | |
| #if _ALLOCATOR | |
| wavelet = ReallocWaveletEx(decoder->allocator, wavelet, width, height, level, type); | |
| #else | |
| wavelet = ReallocWaveletEx(wavelet, width, height, level, type); | |
| #endif | |
| // Save this wavelet in the transform data structure | |
| transform->wavelet[index] = wavelet; | |
| #endif | |
| // The lowpass data is always stored in wavelet band zero | |
| assert(band == 0); | |
| // The lowpass band must be subband zero | |
| assert(subband == 0); | |
| result = DecodeSampleLowPassBand(decoder, input, wavelet); | |
| if (result) | |
| { | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, wavelet, band); | |
| } | |
| // Set the subband number for the next band expected in the bitstream | |
| codec->band.subband = subband + 1; | |
| } | |
| // Was the subband successfully decoded? | |
| if (result) | |
| { | |
| // The transform will set the band valid flag if this is the temporal wavelet | |
| //if (index != 2) | |
| // Record that this subband has been decoded successfully | |
| if (0 <= subband && subband <= CODEC_MAX_SUBBAND) | |
| codec->decoded_subband_flags |= DECODED_SUBBAND_MASK(subband); | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoded subband: %d, wavelet: %d, channel: %d\n", | |
| subband, index, channel); | |
| } | |
| #endif | |
| } | |
| #if _THREADED_DECODER | |
| // Ready to queue a threaded transform to invert this wavelet? | |
| if (BANDS_ALL_STARTED(wavelet)) | |
| { | |
| // Are frames being decoded to quarter resolution? | |
| if (decoder->frame.resolution == DECODED_RESOLUTION_QUARTER && (decoder->codec.encoded_format != ENCODED_FORMAT_BAYER)) | |
| { | |
| // Smallest spatial wavelet above the lowpass temporal band (fieldplus transform) | |
| int highest_index = 5; | |
| if (transform_type == TRANSFORM_TYPE_SPATIAL) | |
| { | |
| // Smallest wavelet in the spatial transform | |
| highest_index = 2; | |
| } | |
| // Only the smallest spatial wavelet must be reconstructed | |
| if (index != highest_index) { | |
| return result; | |
| } | |
| //TODO: Can we improve on the current scheme for quarter resolution decoding? | |
| } | |
| if ((transform->type == TRANSFORM_TYPE_SPATIAL && index > 0) || index >= 2) | |
| { | |
| if(decoder->entropy_worker_new.pool.thread_count && threading) | |
| { | |
| ReconstructWaveletBand(decoder, transform, codec->channel, wavelet, index, | |
| codec->precision, &decoder->scratch, 1); | |
| // Add the inverse wavelet transform to the processing queue | |
| QueueThreadedTransform(decoder, codec->channel, index); | |
| } | |
| else | |
| { | |
| // Apply the inverse wavelet transform to reconstruct the lower level wavelet | |
| ReconstructWaveletBand(decoder, transform, codec->channel, wavelet, index, | |
| codec->precision, &decoder->scratch, 0); | |
| } | |
| } | |
| } | |
| #else | |
| // Ready to invert this wavelet to get the lowpass band in the lower wavelet? | |
| if (BANDS_ALL_VALID(wavelet)) | |
| { | |
| int channel = codec->channel; | |
| //PIXEL *buffer = (PIXEL *)decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| int precision = codec->precision; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| char label[_MAX_PATH]; | |
| int band; | |
| sprintf(label, "Channel: %d, index: %d", channel, index); | |
| DumpImageStatistics(label, wavelet, logfile); | |
| #if 1 | |
| for (band = 1; band < wavelet->num_bands; band++) | |
| { | |
| sprintf(label, "Channel: %d, index: %d, band: %d", channel, index, band); | |
| DumpBandStatistics(label, wavelet, band, logfile); | |
| } | |
| #endif | |
| } | |
| #endif | |
| // Are frames being decoded to quarter resolution? | |
| if (decoder->frame.resolution == DECODED_RESOLUTION_QUARTER && (decoder->codec.encoded_format != ENCODED_FORMAT_BAYER)) | |
| { | |
| // Smallest spatial wavelet above the lowpass temporal band (fieldplus transform) | |
| int highest_index = 5; | |
| if (transform_type == TRANSFORM_TYPE_SPATIAL) | |
| { | |
| // Smallest wavelet in the spatial transform | |
| highest_index = 2; | |
| } | |
| // Only the smallest spatial wavelet must be reconstructed | |
| if (index != highest_index) { | |
| return result; | |
| } | |
| //TODO: Can we improve on the current scheme for quarter resolution decoding? | |
| } | |
| // Apply the inverse wavelet transform to reconstruct the lower level wavelet | |
| ReconstructWaveletBand(decoder, transform, channel, wavelet, index, precision, &decoder->scratch, 0); | |
| } | |
| #endif | |
| return result; | |
| } | |
| // Decode the coefficients in a lowpass band | |
| bool DecodeSampleLowPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int channel = codec->channel; | |
| bool result = true; | |
| int lowpass_width; // Lowpass band dimensions | |
| int lowpass_height; | |
| int lowpass_pitch; | |
| PIXEL *pLowPassRow; // Pointer into the lowpass band | |
| //int wavelet_width; // Dimensions of the wavelet image | |
| //int wavelet_height; | |
| int bits_per_pixel; | |
| int quantization; | |
| int offset; | |
| //int pixel_divisor = (1 << (2 * codec->lowpass.level)); | |
| int row, column; | |
| int32_t solid_color = -1; | |
| const int gain = 128; | |
| const int colorshift = 0; | |
| // int channelgain[4]; | |
| //int waterrow=19, watercol=214; | |
| //int cspace = decoder->frame.colorspace; | |
| // Lowpass image dimensions may be smaller than the wavelet dimensions | |
| // because the encoder may have transmitted an image without the border | |
| lowpass_width = codec->lowpass.width; | |
| lowpass_height = codec->lowpass.height; | |
| lowpass_pitch = wavelet->pitch/sizeof(PIXEL); | |
| pLowPassRow = wavelet->band[0]; | |
| // Get the parameters for quantization performed by the encoder | |
| quantization = codec->lowpass.quantization; | |
| offset = codec->lowpass.pixel_offset; | |
| bits_per_pixel = codec->lowpass.bits_per_pixel; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decode lowpass subband\n"); | |
| } | |
| #endif | |
| if (bits_per_pixel == 16 && stream->nBitsFree == BITSTREAM_BUFFER_SIZE && !(lowpass_width&1)) | |
| { | |
| int32_t *lpCurrentLong = (int32_t *)stream->lpCurrentWord; | |
| //int signval = 0; | |
| //int channel3stats = 0; | |
| int channeloffset = 0; | |
| if(decoder->codec.precision == 8) | |
| { | |
| channeloffset = (codec->num_frames==2 ? 64 : 32); | |
| } | |
| else if(decoder->codec.precision == 10) | |
| { | |
| switch(decoder->frame.format) | |
| { | |
| case DECODED_FORMAT_YU64: | |
| case DECODED_FORMAT_YR16: | |
| case DECODED_FORMAT_V210: | |
| channeloffset = codec->num_frames==2 ? 14 : 4;//DAN20090601, recal I-frame DAN20110301 | |
| break; | |
| default: | |
| channeloffset = codec->num_frames==2 ? 48 : 24;//DAN20090601 | |
| } | |
| if(decoder->sample_uncompressed) //DAN20110301 was testing the GOP length for this (why?) | |
| channeloffset = 0; //DAN20100822 -- Prevent offset between uncompressed V210 and compressed frames | |
| } | |
| else if(decoder->codec.precision == 12) | |
| { | |
| switch(decoder->frame.format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| channeloffset = 8; //DAN200906010 | |
| break; | |
| // 16-bit precision: | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_W13A: | |
| channeloffset = 0; | |
| break; | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| channeloffset = 6; //DAN200906010 //DAN20100822 -- prefect for uncompressed to compressed. | |
| break; | |
| default: | |
| channeloffset = 0; | |
| break; | |
| } | |
| } | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) //DAN20090728 -- Prevent offset between uncompressed and compressed RAW frames | |
| channeloffset = 0; | |
| #define DUMPLL 0 | |
| #if (_DEBUG && DUMPLL) | |
| FILE *fp; | |
| if(channel == 0) | |
| { | |
| static int inc = 1; | |
| char name[256]; | |
| sprintf(name,"C:\\Cedoc\\LLdec%03d.pgm", inc++); | |
| fp = fopen(name,"w"); | |
| fprintf(fp, "P2\n# CREATOR: DAN\n%d %d\n255\n", lowpass_width, lowpass_height); | |
| } | |
| #endif | |
| #if LOSSLESS | |
| channeloffset = 0; //LOSSLESS | |
| #endif | |
| //if(lpCurrentLong[0] == 0xffffffff) | |
| if(lpCurrentLong[0] == (int32_t)UINT32_MAX) | |
| { | |
| if(SwapInt32BtoN(lpCurrentLong[2]) == (uint32_t)lowpass_width) | |
| { | |
| if(SwapInt32BtoN(lpCurrentLong[3]) == (uint32_t)lowpass_height) | |
| { | |
| solid_color = SwapInt32BtoN(lpCurrentLong[1]); | |
| solid_color |= (solid_color<<16); | |
| lpCurrentLong += 4; | |
| } | |
| } | |
| } | |
| // Decode each row in the lowpass image | |
| for (row = 0; row < lowpass_height; row++) | |
| { | |
| int pixels; | |
| // Start at the first column | |
| column = 0; | |
| // Process the rest of the row | |
| { | |
| for (; column < lowpass_width; column++) | |
| { | |
| int pixel_value; | |
| //int i; | |
| // Perform inverse quantization | |
| if(column & 1) | |
| { | |
| pixel_value = pixels; | |
| } | |
| else | |
| { | |
| //pixels = _bswap(*(lpCurrentLong++)); | |
| if(solid_color == -1) | |
| pixels = SwapInt32BtoN(*(lpCurrentLong++)); | |
| else | |
| pixels = solid_color; | |
| pixel_value = (pixels>>16); | |
| pixels <<= 16; | |
| pixels >>= 16; | |
| } | |
| // Store the pixel in the lowpass band of the wavelet | |
| pixel_value += channeloffset; | |
| // pixel_value -= 64; | |
| // pixel_value += ((rand() & 0x7fff) - 0x4000); | |
| // if(pixel_value < 0) pixel_value = 0; | |
| if(pixel_value > 0x7fff) pixel_value = 0x7fff; | |
| pLowPassRow[column] = pixel_value; | |
| #if (_DEBUG && DUMPLL) | |
| if(channel==0 && fp) | |
| fprintf(fp, "%d\n", pixel_value>>7); | |
| #endif | |
| } | |
| } | |
| // Advance to the next row in the lowpass image | |
| pLowPassRow += lowpass_pitch; | |
| } | |
| #if (_DEBUG && DUMPLL) | |
| if(channel == 0 && fp) | |
| fclose(fp); | |
| #endif | |
| #if ERROR_TOLERANT | |
| // Update the count of bytes used | |
| stream->nWordsUsed -= (int)(((intptr_t)lpCurrentLong - (intptr_t)stream->lpCurrentWord)); | |
| #endif | |
| // Update the bitstream | |
| stream->lpCurrentWord = (uint8_t *)lpCurrentLong; | |
| } | |
| else if (bits_per_pixel == 8 && stream->nBitsFree == BITSTREAM_BUFFER_SIZE) | |
| { | |
| uint8_t *lpCurrentByte = (uint8_t *)stream->lpCurrentWord; | |
| //int signval = 0; | |
| // Decode each row in the lowpass image | |
| for (row = 0; row < lowpass_height; row++) | |
| { | |
| // Start at the first column | |
| column = 0; | |
| // Process the rest of the row | |
| for (; column < lowpass_width; column++) | |
| { | |
| int pixel_value = *(lpCurrentByte++); | |
| // Perform inverse quantization | |
| #if _ENCODE_CHROMA_ZERO | |
| if (channel == 0) | |
| pixel_value = (quantization * pixel_value) + offset; | |
| else | |
| pixel_value = (pixel_value - offset) * quantization; | |
| #else | |
| pixel_value = (quantization * pixel_value) + offset;// + colorshift; | |
| #endif | |
| pixel_value -= 128 * quantization; | |
| pixel_value *= gain; | |
| pixel_value >>= 7; | |
| pixel_value += 128 * quantization; | |
| pixel_value += colorshift; | |
| // Store the pixel in the lowpass band of the wavelet | |
| // Multiply by 16 to turn 8-bit into the new 16-bit format | |
| pLowPassRow[column] = pixel_value * 16; | |
| } | |
| // Advance to the next row in the lowpass image | |
| pLowPassRow += lowpass_pitch; | |
| } | |
| #if ERROR_TOLERANT | |
| // Update the count of bytes used | |
| stream->nWordsUsed -= (int)(((intptr_t)lpCurrentByte - (intptr_t)stream->lpCurrentWord)); | |
| #endif | |
| // Update the bitstream | |
| stream->lpCurrentWord = (uint8_t *)lpCurrentByte; | |
| } | |
| else | |
| { | |
| int channeloffset = 0; | |
| if(decoder->codec.precision == 8) | |
| { | |
| channeloffset = (codec->num_frames==2 ? 64 : 32); | |
| } | |
| else if(decoder->codec.precision == 10) | |
| { | |
| channeloffset = (codec->num_frames==2 ? 10 : 5); | |
| } | |
| else if(decoder->codec.precision == 12) | |
| { | |
| // channeloffset = (codec->num_frames==2 ? 4 : 2); // Seems to result in less shift using the viper images | |
| } | |
| //DAN20050923 no longer trying to compensate for YUV to RGB issues. | |
| if(decoder->frame.format == DECODED_FORMAT_RGB24 || decoder->frame.format == DECODED_FORMAT_RGB32) | |
| { | |
| if(decoder->codec.precision == 8) | |
| { | |
| switch(channel) | |
| { | |
| case 0: channeloffset += 8; break; // fixed rounding error introduced by YUV->RGB | |
| case 1: channeloffset += 16; break; | |
| case 2: channeloffset += 10; break; | |
| } | |
| } | |
| else if(decoder->codec.precision == 10) | |
| { | |
| switch(channel) | |
| { | |
| case 0: channeloffset += -8; break; // fixed rounding error introduced by YUV->RGB | |
| case 1: channeloffset += -4; break; | |
| case 2: channeloffset += -4; break; | |
| } | |
| } | |
| else if(decoder->codec.precision == 12) | |
| { | |
| switch(channel) | |
| { | |
| case 0: channeloffset += 0; break; // fixed rounding error introduced by YUV->RGB | |
| case 1: channeloffset += 0; break; | |
| case 2: channeloffset += 0; break; | |
| } | |
| } | |
| } | |
| if(bits_per_pixel != 16) | |
| channeloffset = 0; | |
| for (row = 0; row < lowpass_height; row++) | |
| { | |
| for (column = 0; column < lowpass_width; column++) { | |
| int pixel_value = GetBits(stream, bits_per_pixel); | |
| // Perform inverse quantization | |
| #if _ENCODE_CHROMA_ZERO | |
| if (channel == 0) | |
| pixel_value = (quantization * pixel_value) + offset; | |
| else | |
| pixel_value = (pixel_value - offset) * quantization; | |
| #else | |
| pixel_value = (quantization * pixel_value) + offset;// + colorshift; | |
| #endif | |
| // Store the pixel in the lowpass band of the wavelet | |
| pLowPassRow[column] = SATURATE(pixel_value + channeloffset); // DAN20050926 added chromaoffet to match the normal path -- this code will be used for SD (720) encodes | |
| } | |
| stream->nWordsUsed -= lowpass_width*(bits_per_pixel>>3); | |
| // Advance to the next row in the lowpass image | |
| pLowPassRow += lowpass_pitch; | |
| } | |
| } | |
| // Set the wavelet scale factor | |
| wavelet->scale[0] = quantization; | |
| // Align the bitstream to the next tag value pair | |
| AlignBitsTag(stream); | |
| // Return indication of lowpass decoding success | |
| return result; | |
| } | |
| // Decode the coefficients in a highpass band | |
| bool DecodeSampleHighPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, int band, int threading) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| //int channel = codec->channel; | |
| //int subband = codec->band.subband; | |
| //int index = codec->highpass.wavelet_number; | |
| int width; | |
| int height; | |
| int quantization; | |
| // The encoder may not have used variable-length coding | |
| int method = codec->band.encoding; | |
| bool result = true; | |
| // Check that the band index is in range | |
| assert(0 <= band && band <= codec->max_subband); | |
| // Encoded coefficients start on a tag boundary | |
| AlignBitsTag(stream); | |
| #if (0 && DEBUG) | |
| // Dump the band header to the logfile | |
| if (logfile) { | |
| fprintf(logfile, | |
| "Band header marker: 0x%04X, subband: %d, width: %d, height: %d, encoding: %d\n", | |
| header->marker, header->subband, header->width, header->height, header->encoding); | |
| } | |
| #endif | |
| // Copy the scale factors used by the encoder into the wavelet band | |
| // (Zero means that the encoder did not supply this parameter) | |
| if (codec->band.scale > 0) { | |
| wavelet->scale[band] = codec->band.scale; | |
| } | |
| // Get the quantization factor that was used to encode the band coefficients | |
| quantization = codec->band.quantization; | |
| // Copy the quantization into the wavelet | |
| wavelet->quantization[band] = quantization; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decode highpass subband: %d, quantization: %d\n", subband, quantization); | |
| } | |
| #endif | |
| // Get the highpass band dimensions | |
| width = codec->band.width; | |
| height = codec->band.height; | |
| // Is this a special band for the temporal high pass thumbnail? | |
| if (method == BAND_ENCODING_LOSSLESS) | |
| { | |
| //lossless temporal subband //DAN20060701 | |
| result = DecodeBand16sLossless(decoder, stream, wavelet, band, width, height); | |
| assert(result); | |
| if (result) | |
| { | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, wavelet, band); | |
| } | |
| } | |
| else if (method == BAND_ENCODING_16BIT) | |
| { | |
| //lossless temporal subband //DAN20060701 | |
| result = DecodeBand16s(decoder, stream, wavelet, band, width, height); | |
| assert(result); | |
| if (result) | |
| { | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, wavelet, band); | |
| } | |
| } | |
| else | |
| { | |
| // Must use the runlength encoding method | |
| assert(codec->band.encoding == BAND_ENCODING_RUNLENGTHS); | |
| #if 0 | |
| // This code attempts to not decode various subbands for 1/4 res decodes. | |
| // Unforuntately playback would stop after 5 seonds with this code (but not in debug mode.) | |
| if (subband >= 4 && subband <= 6) | |
| { | |
| TAGVALUE segment; | |
| AlignBitsTag(stream); | |
| do | |
| { | |
| segment = GetTagValue(stream); | |
| } | |
| while(segment.tuple.tag != CODEC_TAG_BAND_TRAILER); | |
| stream->lpCurrentWord -= 4; | |
| stream->nWordsUsed += 4; | |
| } | |
| else | |
| #elif 0 | |
| // Is this subband required for decoding the frame? | |
| if (CanSkipSubband(decoder, subband)) | |
| { | |
| // Skip past the end of this subband | |
| SkipSubband(stream); | |
| } | |
| #endif | |
| // Decode this subband | |
| result = DecodeFastRunsFSM16s(decoder, stream, wavelet, band, width, height, threading); | |
| } | |
| // Return failure if a problem was encountered while reading the band coefficients | |
| if (!result) return result; | |
| // The encoded band coefficients end on a bitstream word boundary | |
| // to avoid interference with the marker for the coefficient band trailer | |
| AlignBits(stream); | |
| // Decode the band trailer | |
| error = DecodeBandTrailer(stream, NULL); | |
| decoder->error = error; | |
| assert(error == CODEC_ERROR_OKAY); | |
| if (error != CODEC_ERROR_OKAY) { | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Error in band %d trailer: %d\n", band, error); | |
| } | |
| #endif | |
| return false; | |
| } | |
| return result; | |
| } | |
| // Decode an empty band | |
| bool DecodeSampleEmptyBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, int band) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int quantization; | |
| // Check that the band is in range | |
| assert(0 <= band && band <= CODEC_MAX_HIGHBANDS); | |
| // Check that the highpass band is 16 bits | |
| assert(wavelet->pixel_type[1] == PIXEL_TYPE_16S); | |
| #if (0 && DEBUG) | |
| //TODO: Change format string to handle 64-bit pointers | |
| if (logfile) { | |
| fprintf(logfile, "Start decoding an empty band, stream: 0x%p\n", stream->lpCurrentWord); | |
| } | |
| #endif | |
| // Encoded coefficients must start on a word boundary | |
| AlignBits(stream); | |
| // Copy the scale factors used by the encoder into the wavelet band | |
| // (Zero means that the encoder did not supply the parameter) | |
| if (codec->band.scale > 0) | |
| wavelet->scale[band] = codec->band.scale; | |
| // Set the quantization used to encode the band coefficients | |
| quantization = codec->band.quantization; | |
| wavelet->quantization[band] = quantization; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| DumpBits(stream, logfile); | |
| } | |
| #endif | |
| // Decode the band trailer | |
| error = DecodeBandTrailer(stream, NULL); | |
| decoder->error = error; | |
| assert(error == CODEC_ERROR_OKAY); | |
| if (error != CODEC_ERROR_OKAY) { | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Error in band: %d, error: %d\n", band, error); | |
| } | |
| #endif | |
| return false; | |
| } | |
| // The encoded band coefficients end on a bitstream word boundary | |
| // to avoid interference with the marker for the coefficient band trailer | |
| AlignBits(stream); | |
| #if (0 && DEBUG) | |
| // Dump the band trailer to the logfile | |
| if (logfile) { | |
| fprintf(logfile, "Band trailer marker: 0x%04X\n", trailer->marker); | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| //TODO: Change format string to handle 64-bit pointers | |
| fprintf(logfile, "End decode empty band, stream: 0x%X\n", stream->lpCurrentWord); | |
| } | |
| #endif | |
| return true; | |
| } | |
| bool DecodeBand16s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height) | |
| { | |
| PIXEL *rowptr = wavelet->band[band_index]; | |
| int pitch = wavelet->pitch; | |
| int row,dequant = wavelet->quantization[band_index]; | |
| // Convert the pitch from bytes to pixels | |
| pitch /= sizeof(PIXEL); | |
| //BAND_ENCODING_16BIT | |
| if(dequant == 1) | |
| { | |
| for (row = 0; row < height; row++) | |
| { | |
| int column; | |
| #if 0 | |
| for (column = 0; column < width; column++) | |
| { | |
| int value = GetWord16s(stream); | |
| rowptr[column] = value; | |
| } | |
| #else // Mild speedup (2.5% overall half-res decode improvement.) | |
| char *sptr = (char *)stream->lpCurrentWord; | |
| char *dptr = (char *)rowptr; | |
| for (column = 0; column < width; column++) | |
| { | |
| *(dptr+1) = *sptr++; | |
| *dptr = *sptr++; | |
| dptr+=2; | |
| } | |
| stream->lpCurrentWord += width*2; | |
| stream->nWordsUsed += width*2; | |
| #endif | |
| rowptr += pitch; | |
| } | |
| } | |
| else | |
| { | |
| for (row = 0; row < height; row++) | |
| { | |
| int column; | |
| for (column = 0; column < width; column++) | |
| { | |
| int value = GetWord16s(stream); | |
| rowptr[column] = value*dequant; | |
| } | |
| rowptr += pitch; | |
| } | |
| } | |
| #if (0 && DEBUG) | |
| { | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| sprintf(label, "Hightemp-decode-%d-", count); | |
| DumpBandPGM(label, wavelet, band_index, NULL); | |
| } | |
| count++; | |
| } | |
| #endif | |
| return true; | |
| } | |
| bool DecodeBand16sLossless(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height) | |
| { | |
| //CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| int result = true; | |
| int quant = wavelet->quantization[band_index]; | |
| // Get the pointer to the finite state machine | |
| FSM *fsm = &decoder->fsm[decoder->codec.active_codebook]; | |
| int size; | |
| PIXEL *rowptr; | |
| //int row = 0; | |
| int pitch; | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int channel = codec->channel; | |
| //int subband = codec->band.subband; | |
| //int num_subbands = codec->num_subbands; | |
| //int pixel_type = wavelet->pixel_type[band_index]; | |
| //int difference_coding = decoder->codec.difference_coding; | |
| //int localquant = 1; | |
| //int threading = 0; | |
| decoder->codec.active_codebook = 0; // reset CODEC state | |
| decoder->codec.difference_coding = 0; //reset state for next subband | |
| // Must have a valid wavelet | |
| assert(wavelet != NULL); | |
| if (! (wavelet != NULL)) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| //Must have a valid FSM | |
| assert(fsm != NULL); | |
| if (! (fsm != NULL)) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // All rows are treated as one int32_t row that covers the entire band | |
| size = fsm->table.num_states; | |
| assert(size > 0); | |
| if (size == 0) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // Check if the band is intended for 8-bit pixels | |
| assert(wavelet->pixel_type[band_index] == PIXEL_TYPE_16S); | |
| rowptr = (PIXEL *)wavelet->band[band_index]; | |
| pitch = wavelet->pitch; | |
| assert(rowptr != NULL && pitch != 0); | |
| if (! (rowptr != NULL && pitch != 0)) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| DeQuantFSM(fsm, 1); // can;t use this to dequant as we split the cooefficients into high and low bytes. | |
| if (!DecodeBandFSM16sNoGap2Pass(fsm, stream, (PIXEL16S *)rowptr, width, height, pitch, quant)) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| if(quant) | |
| { | |
| int x,y; | |
| PIXEL *line = rowptr; | |
| if(quant == 32) | |
| { | |
| for(y=0;y<height;y++) | |
| { | |
| for(x=0;x<width;x++) | |
| { | |
| line[x] <<= 5; | |
| } | |
| line += pitch/2; | |
| } | |
| } | |
| else | |
| { | |
| for(y=0;y<height;y++) | |
| { | |
| for(x=0;x<width;x++) | |
| { | |
| line[x] *= quant; | |
| } | |
| line += pitch/2; | |
| } | |
| } | |
| } | |
| /* if(once <= 60) | |
| { | |
| char name[200]; | |
| FILE *fp; | |
| sprintf(name,"C:/Cedoc/DUMP/Decoder/dump%02d.raw", once); | |
| fp = fopen(name,"wb"); | |
| fwrite(rowptr,width*height,1,fp); | |
| fclose(fp); | |
| once++; | |
| }*/ | |
| assert(result == true); | |
| if (! (result == true)) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| return true; | |
| } | |
| // Invert the wavelet to reconstruct the lower wavelet in the transform | |
| void ReconstructWaveletBand(DECODER *decoder, TRANSFORM *transform, int channel, | |
| IMAGE *wavelet, int index, int precision, | |
| const SCRATCH *scratch, int allocations_only) | |
| { | |
| int transform_type = transform->type; | |
| int width = wavelet->width; | |
| int height = wavelet->height; | |
| int level = wavelet->level; | |
| PIXEL *buffer = (PIXEL *)scratch->free_ptr; | |
| size_t buffer_size = scratch->free_size; | |
| // Is the current wavelet a spatial wavelet? | |
| if (transform_type == TRANSFORM_TYPE_SPATIAL && index > 0) | |
| { | |
| // Reconstruct the lowpass band in the lower wavelet | |
| int lowpass_index = index - 1; | |
| IMAGE *lowpass = transform->wavelet[lowpass_index]; | |
| int lowpass_width = 2 * width; | |
| int lowpass_height = 2 * height; | |
| int lowpass_level = level - 1; | |
| int lowpass_type = (lowpass_index == 0) ? WAVELET_TYPE_FRAME : WAVELET_TYPE_SPATIAL; | |
| //const int prescale = 1; | |
| const bool inverse_prescale = (precision >= CODEC_PRECISION_10BIT); | |
| int prescale = transform->prescale[index]; | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| lowpass = GetWaveletThreadSafe(decoder, transform, lowpass_index, | |
| lowpass_width, lowpass_height, | |
| lowpass_level, lowpass_type); | |
| #else | |
| // Allocate the wavelet if not already allocated | |
| #if _ALLOCATOR | |
| lowpass = ReallocWaveletEx(decoder->allocator, lowpass, lowpass_width, lowpass_height, lowpass_level, lowpass_type); | |
| #else | |
| lowpass = ReallocWaveletEx(lowpass, lowpass_width, lowpass_height, lowpass_level, lowpass_type); | |
| #endif | |
| transform->wavelet[lowpass_index] = lowpass; | |
| #endif | |
| // Check that the lowpass band has not already been reconstructed | |
| //assert((lowpass->band_valid_flags & BAND_VALID_MASK(0)) == 0); | |
| if(!allocations_only) | |
| { | |
| // Check that all of the wavelet bands have been decoded | |
| assert(BANDS_ALL_VALID(wavelet)); | |
| // Has this wavelet already been reconstructed? | |
| if ((lowpass->band_valid_flags & BAND_VALID_MASK(0)) == 0) | |
| { | |
| // Perform the inverse spatial transform before decoding the next wavelet | |
| STOP(tk_decoding); | |
| START(tk_inverse); | |
| //TransformInverseSpatialQuantLowpass(wavelet, lowpass, buffer, buffer_size, prescale, inverse_prescale); | |
| TransformInverseSpatialQuantLowpass(wavelet, lowpass, scratch, prescale, inverse_prescale); | |
| STOP(tk_inverse); | |
| START(tk_decoding); | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, lowpass, 0); | |
| #if TIMING | |
| // Increment the count of spatial transforms performed during decoding | |
| spatial_decoding_count++; | |
| #endif | |
| } | |
| } | |
| } | |
| // Is the current wavelet a spatial wavelet above the temporal lowpass band? | |
| else if (index > 3) | |
| { | |
| // Reconstruct the lowpass band in the lower wavelet | |
| const int temporal_wavelet_index = 2; | |
| int lowpass_index = (index > 4) ? index - 1 : index - 2; | |
| IMAGE *lowpass = transform->wavelet[lowpass_index]; | |
| int lowpass_width = 2 * width; | |
| int lowpass_height = 2 * height; | |
| int lowpass_level = level - 1; | |
| int lowpass_type = ((lowpass_index == temporal_wavelet_index) ? WAVELET_TYPE_TEMPORAL : WAVELET_TYPE_SPATIAL); | |
| //const int prescale = 2; | |
| const bool inverse_prescale = (precision >= CODEC_PRECISION_10BIT); | |
| int prescale = transform->prescale[index]; | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| lowpass = GetWaveletThreadSafe(decoder, transform, lowpass_index, | |
| lowpass_width, lowpass_height, | |
| lowpass_level, lowpass_type); | |
| #else | |
| // Allocate the wavelet if not already allocated | |
| #if _ALLOCATOR | |
| lowpass = ReallocWaveletEx(decoder->allocator, lowpass, lowpass_width, lowpass_height, lowpass_level, lowpass_type); | |
| #else | |
| lowpass = ReallocWaveletEx(lowpass, lowpass_width, lowpass_height, lowpass_level, lowpass_type); | |
| #endif | |
| transform->wavelet[lowpass_index] = lowpass; | |
| #endif | |
| if(!allocations_only) | |
| { | |
| // Check that the lowpass band has not already been reconstructed | |
| assert((lowpass->band_valid_flags & BAND_VALID_MASK(0)) == 0); | |
| // Check that all of the wavelet bands have been decoded | |
| assert(BANDS_ALL_VALID(wavelet)); | |
| // Perform the inverse spatial transform before decoding the next wavelet | |
| STOP(tk_decoding); | |
| START(tk_inverse); | |
| //TransformInverseSpatialQuantLowpass(wavelet, lowpass, buffer, buffer_size, prescale, inverse_prescale); | |
| TransformInverseSpatialQuantLowpass(wavelet, lowpass, scratch, prescale, inverse_prescale); | |
| STOP(tk_inverse); | |
| START(tk_decoding); | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, lowpass, 0); | |
| #if TIMING | |
| // Increment the count of spatial transforms performed during decoding | |
| spatial_decoding_count++; | |
| #endif | |
| } | |
| } | |
| // Is the current wavelet the spatial wavelet above the temporal highpass band? | |
| else if (index == 3) | |
| { | |
| // Reconstruct the highpass band in the temporal wavelet | |
| const int temporal_wavelet_index = 2; | |
| int highpass_index = index - 1; | |
| IMAGE *highpass = transform->wavelet[highpass_index]; | |
| int highpass_width = 2 * width; | |
| int highpass_height = 2 * height; | |
| int highpass_level = level - 1; | |
| int highpass_type = ((highpass_index == temporal_wavelet_index) ? WAVELET_TYPE_TEMPORAL : WAVELET_TYPE_SPATIAL); | |
| const bool inverse_prescale = (precision >= CODEC_PRECISION_10BIT); | |
| int prescale = inverse_prescale ? transform->prescale[index] : 0; | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the wavelet with thread safety | |
| highpass = GetWaveletThreadSafe(decoder, transform, highpass_index, | |
| highpass_width, highpass_height, | |
| highpass_level, highpass_type); | |
| #else | |
| // Allocate the wavelet if not already allocated | |
| #if _ALLOCATOR | |
| highpass = ReallocWaveletEx(decoder->allocator, highpass , highpass_width, highpass_height, highpass_level, highpass_type); | |
| #else | |
| highpass = ReallocWaveletEx(highpass , highpass_width, highpass_height, highpass_level, highpass_type); | |
| #endif | |
| transform->wavelet[highpass_index] = highpass; | |
| #endif | |
| if(!allocations_only) | |
| { | |
| // Check that the highpass band has not already been reconstructed | |
| assert((highpass->band_valid_flags & BAND_VALID_MASK(1)) == 0); | |
| // Check that all of the wavelet bands have been decoded | |
| assert(BANDS_ALL_VALID(wavelet)); | |
| // Perform the inverse spatial transform before decoding the next wavelet | |
| STOP(tk_decoding); | |
| START(tk_inverse); | |
| TransformInverseSpatialQuantHighpass(wavelet, highpass, buffer, buffer_size, prescale); | |
| STOP(tk_inverse); | |
| START(tk_decoding); | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, highpass, 1); | |
| #if TIMING | |
| // Increment the count of spatial transforms performed during decoding | |
| spatial_decoding_count++; | |
| #endif | |
| } | |
| } | |
| // Is the current wavelet the temporal wavelet? | |
| else if (index == 2) | |
| { | |
| // Get the temporal wavelet | |
| IMAGE *temporal = wavelet; | |
| // Set the frame wavelet parameters | |
| int frame_level = 1; | |
| int frame_type = WAVELET_TYPE_FRAME; | |
| // Get the two frame wavelets | |
| IMAGE *frame[2]; | |
| frame[0] = transform->wavelet[0]; | |
| frame[1] = transform->wavelet[1]; | |
| // Check that the temporal wavelet is valid | |
| assert(temporal->num_bands == 2 && temporal->wavelet_type == WAVELET_TYPE_TEMPORAL); | |
| #if _THREADED_DECODER | |
| // Allocate (or reallocate) the frame wavelets with thread safety | |
| frame[0] = GetWaveletThreadSafe(decoder, transform, 0, width, height, frame_level, frame_type); | |
| frame[1] = GetWaveletThreadSafe(decoder, transform, 1, width, height, frame_level, frame_type); | |
| #else | |
| // Allocate the frame wavelets if not already allocated | |
| #if _ALLOCATOR | |
| frame[0] = ReallocWaveletEx(decoder->allocator, frame[0], width, height, frame_level, frame_type); | |
| frame[1] = ReallocWaveletEx(decoder->allocator, frame[1], width, height, frame_level, frame_type); | |
| #else | |
| frame[0] = ReallocWaveletEx(frame[0], width, height, frame_level, frame_type); | |
| frame[1] = ReallocWaveletEx(frame[1], width, height, frame_level, frame_type); | |
| #endif | |
| transform->wavelet[0] = frame[0]; | |
| transform->wavelet[1] = frame[1]; | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Before inverse temporal transform"); | |
| DumpArray16s("Temporal Lowpass", temporal->band[0], temporal->width, temporal->height, temporal->pitch, logfile); | |
| DumpArray16s("Temporal Highpass", temporal->band[1], temporal->width, temporal->height, temporal->pitch, logfile); | |
| } | |
| #endif | |
| if(!allocations_only) | |
| { | |
| // Check that the lowpass bands have not already been reconstructed | |
| assert((frame[0]->band_valid_flags & BAND_VALID_MASK(0)) == 0); | |
| assert((frame[1]->band_valid_flags & BAND_VALID_MASK(0)) == 0); | |
| // Check that all of the wavelet bands have been decoded | |
| assert(BANDS_ALL_VALID(temporal)); | |
| // Invert the temporal transform between the frame wavelets | |
| STOP(tk_decoding); | |
| START(tk_inverse); | |
| TransformInverseTemporalQuant(temporal, frame[0], frame[1], buffer, buffer_size, precision); | |
| STOP(tk_inverse); | |
| START(tk_decoding); | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| IMAGE *wavelet = quad[0]; | |
| fprintf(logfile, "After inverse temporal transform\n"); | |
| DumpArray16s("Temporal Lowpass", temporal->band[0], temporal->width, temporal->height, temporal->pitch, logfile); | |
| DumpArray16s("Temporal Highpass", temporal->band[1], temporal->width, temporal->height, temporal->pitch, logfile); | |
| DumpArray16s("First frame wavelet, band 0", wavelet->band[0], wavelet->width, wavelet->height, wavelet->pitch, logfile); | |
| } | |
| #endif | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, frame[0], 0); | |
| UpdateWaveletBandValidFlags(decoder, frame[1], 0); | |
| #if TIMING | |
| // Increment the number of temporal transforms performed outside of decoding | |
| temporal_decoding_count++; | |
| #endif | |
| } | |
| } | |
| } | |
| // Compute the dimensions of the output buffer | |
| void ComputeOutputDimensions(DECODER *decoder, int frame, | |
| int *decoded_width_out, int *decoded_height_out) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| FRAME_INFO *info = &decoder->frame; | |
| //int progressive = codec->progressive; | |
| TRANSFORM **transform_array = decoder->transform; | |
| //IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| IMAGE *wavelet = NULL; | |
| int wavelet_width; | |
| int wavelet_height; | |
| int decoded_width; | |
| int decoded_height; | |
| int resolution = info->resolution; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| int decoded_scale = 0; | |
| if (decoded_width_out == NULL || decoded_height_out == NULL) { | |
| return; | |
| } | |
| // Clear the return values in case this routine terminates early | |
| *decoded_width_out = 0; | |
| *decoded_height_out = 0; | |
| // Get the decoding scale | |
| switch(resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL: | |
| #if DEBUG | |
| assert(AllTransformBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| decoded_scale = 2; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| break; | |
| case DECODED_RESOLUTION_HALF: | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| } | |
| else | |
| { | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[3]; | |
| } | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[5]; | |
| if(wavelet == NULL) // there Intra Frame compressed | |
| wavelet = transform_array[0]->wavelet[2]; | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| if(resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| decoded_width = wavelet_width; | |
| else | |
| decoded_width = decoded_scale * wavelet_width; | |
| decoded_height = decoded_scale * wavelet_height; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoded scale: %d, decoded width: %d, wavelet width: %d\n", decoded_scale, decoded_width, wavelet_width); | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoded width: %d, height: %d, frame width: %d, height: %d, output pitch: %d\n", | |
| decoded_width, decoded_height, info->width, info->height, pitch); | |
| } | |
| #endif | |
| // Return the decoded width and height | |
| *decoded_width_out = decoded_width; | |
| *decoded_height_out = decoded_height; | |
| } | |
| #define DEBUG_ROW16U 0 | |
| void ReconstructSampleFrameToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) | |
| { | |
| FRAME_INFO local_info; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| FRAME_INFO *info = &local_info; | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| int progressive = codec->progressive; | |
| TRANSFORM **transform_array = decoder->transform; | |
| IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| IMAGE *wavelet; | |
| int wavelet_width; | |
| int wavelet_height; | |
| int decoded_width; | |
| int decoded_height; | |
| int resolution = decoder->frame.resolution; | |
| int chroma_offset = decoder->codec.chroma_offset; | |
| int uncompressed = decoder->uncompressed_chunk && decoder->uncompressed_size && decoder->sample_uncompressed; | |
| //TODO: Change this routine to return the codec error code | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| //if(decoder->cfhddata.calibration) | |
| // LoadTweak(); | |
| //TODO: Change this routine to return an error code | |
| if (decoder == NULL) { | |
| return; | |
| } | |
| decoder->gop_frame_num = frame; | |
| #if _THREADED_DECODER | |
| // Wait until the transform thread has finished all pending transforms | |
| WaitForTransformThread(decoder); | |
| #endif | |
| //return; | |
| // copy frame info in a changable local structure | |
| memcpy(info, &decoder->frame, sizeof(FRAME_INFO)); | |
| // Use the old code for reconstructing the frame | |
| #if (0 && DEBUG) | |
| // Force quarter resolution decoding for debugging that feature | |
| resolution = DECODED_RESOLUTION_QUARTER; | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Inverting last wavelet, frame: %d\n", frame); | |
| } | |
| #endif | |
| // The decoder can decode a video sample without returning a frame | |
| if (output == NULL || pitch == 0) return; | |
| #if (1 && DEBUG_ROW16U) | |
| // Force decoding to 16-bit pixels for debugging | |
| info->format = DECODED_FORMAT_YR16; | |
| #endif | |
| #if 0 | |
| if (info->format == DECODED_FORMAT_YR16) | |
| { | |
| // Force interlaced or progressive decoding for debugging | |
| //progressive = false; | |
| progressive = true; | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoder flags: 0x%p\n", decoder->flags); | |
| } | |
| #endif | |
| // Does this frame have to be reconstructed? | |
| if ((decoder->flags & DECODER_FLAGS_RENDER) == 0) { | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoder discarding frame: %d\n", frame); | |
| } | |
| #endif | |
| return; | |
| } | |
| // Check that the requested frame is within the limits of the group of frames | |
| assert(0 <= frame && frame < decoder->gop_length); | |
| // Check that the frame resolution is valid | |
| assert(IsValidFrameResolution(resolution)); | |
| if (!IsValidFrameResolution(resolution)) { | |
| decoder->error = CODEC_ERROR_RESOLUTION; | |
| return; | |
| } | |
| #if (0 && TIMING) //(0 && DEBUG) | |
| // Override progressive flag read from the bitstream for debugging | |
| //progressive = 0; // Use the inverse frame transform | |
| progressive = 1; // Use the inverse spatial transform | |
| #endif | |
| // Build the 3D LUTs if needed | |
| ComputeCube(decoder); | |
| //HACK DAN20110131 -- some formats will not directly decode so need to use the AM route | |
| { | |
| if( decoder->codec.encoded_format == ENCODED_FORMAT_YUV_422 && | |
| resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| if( decoder->frame.format == COLOR_FORMAT_R408 || | |
| decoder->frame.format == COLOR_FORMAT_V408) | |
| { | |
| decoder->use_active_metadata_decoder = true; | |
| decoder->apply_color_active_metadata = true; | |
| } | |
| } | |
| if( decoder->frame.format == COLOR_FORMAT_NV12) | |
| { | |
| decoder->use_active_metadata_decoder = true; | |
| decoder->apply_color_active_metadata = true; // TODO, make it work with this. | |
| } | |
| if (decoder->codec.progressive == false && decoder->frame.format == COLOR_FORMAT_RGB24) | |
| { | |
| decoder->use_active_metadata_decoder = true; | |
| decoder->apply_color_active_metadata = true; | |
| } | |
| } | |
| // Get the decoding scale | |
| if(!uncompressed) | |
| { | |
| switch(resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| #if DEBUG | |
| assert(AllTransformBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| wavelet = transform_array[0]->wavelet[0]; | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = 2 * wavelet_width; | |
| decoded_height = 2 * wavelet_height; | |
| break; | |
| case DECODED_RESOLUTION_HALF: | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| wavelet = transform_array[0]->wavelet[0]; | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = wavelet_width; | |
| decoded_height = wavelet_height; | |
| break; | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL: | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| wavelet = transform_array[0]->wavelet[0]; | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = wavelet_width; | |
| decoded_height = 2 * wavelet_height; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame)); | |
| #endif | |
| wavelet = transform_array[0]->wavelet[0]; | |
| } | |
| else | |
| { | |
| wavelet = transform_array[0]->wavelet[3]; | |
| } | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = wavelet_width; | |
| decoded_height = wavelet_height; | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| wavelet = transform_array[0]->wavelet[5]; | |
| if(wavelet == NULL) // there Intra Frame compressed | |
| wavelet = transform_array[0]->wavelet[2]; | |
| // Get the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = wavelet_width; | |
| decoded_height = wavelet_height; | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| decoded_width = info->width/2; | |
| decoded_height = info->height/2; | |
| } | |
| else | |
| { | |
| decoded_width = info->width; | |
| decoded_height = info->height; | |
| } | |
| } | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| if(resolution == DECODED_RESOLUTION_FULL) | |
| { | |
| if(decoded_width*2 == info->width) | |
| { | |
| info->width /= 2; | |
| info->height /= 2; | |
| info->resolution = resolution = DECODED_RESOLUTION_FULL_DEBAYER; | |
| } | |
| } | |
| else if(resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| { | |
| if(decoded_width*2 == info->width) | |
| { | |
| info->width /= 2; | |
| info->height /= 2; | |
| } | |
| } | |
| else if(resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| if(decoded_width*2 == info->width) | |
| { | |
| info->height /= 2; | |
| info->resolution = resolution = DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER; | |
| } | |
| } | |
| else if(decoder->frame.format == DECODED_FORMAT_BYR2 || decoder->frame.format == DECODED_FORMAT_BYR4) | |
| { | |
| if(decoded_width*2 == info->width) | |
| { | |
| info->width /= 2; | |
| info->height /= 2; | |
| info->resolution = resolution = DECODED_RESOLUTION_HALF_NODEBAYER; | |
| } | |
| } | |
| else | |
| { | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| if(decoded_width*2 == info->width) | |
| { | |
| decoded_width *= 2; | |
| decoded_height *= 2; | |
| info->resolution = resolution = DECODED_RESOLUTION_FULL; | |
| } | |
| } | |
| else if(resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| if(uncompressed) | |
| { | |
| decoded_width *= 2; | |
| decoded_height *= 2; | |
| info->resolution = resolution = DECODED_RESOLUTION_QUARTER_NODEBAYER_SCALED; | |
| } | |
| else | |
| { | |
| if(decoded_width == info->width) | |
| { | |
| info->resolution = resolution = DECODED_RESOLUTION_HALF; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if(uncompressed) | |
| { | |
| // Call the appropriate routine for the encoded format | |
| switch (decoder->codec.encoded_format) | |
| { | |
| case ENCODED_FORMAT_YUVA_4444: // Four planes of YUVA 4:4:4:4 | |
| // Not implemented | |
| assert(0); | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| case ENCODED_FORMAT_BAYER: // Bayer encoded data | |
| // Add new code here for the final steps in decoding the Bayer format | |
| error = UncompressedSampleFrameBayerToBuffer(decoder, info, frame, output, pitch); | |
| break; | |
| case ENCODED_FORMAT_YUV_422: // Original encoding scheme for YUV 4:2:2 (always v210) | |
| error = UncompressedSampleFrameYUVToBuffer(decoder, info, frame, output, pitch);//CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| case ENCODED_FORMAT_RGB_444: // Original encoding scheme for RGB 444 (always DPX0) | |
| error = UncompressedSampleFrameRGBToBuffer(decoder, info, frame, output, pitch);//CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| default: | |
| // Fall through into the old code for reconstructing frames | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| // Call the appropriate routine for the encoded format | |
| switch (decoder->codec.encoded_format) | |
| { | |
| case ENCODED_FORMAT_RGB_444: // channels = decoder->codec.num_channels; planes of RGB 4:4:4 | |
| case ENCODED_FORMAT_RGBA_4444: // Four planes of ARGB 4:4:4:4 | |
| error = ReconstructSampleFrameRGB444ToBuffer(decoder, frame, output, pitch); | |
| break; | |
| case ENCODED_FORMAT_YUVA_4444: // Four planes of YUVA 4:4:4:4 | |
| // Not implemented | |
| assert(0); | |
| //error = ReconstructSampleFrameYUVA4444ToBuffer(decoder, frame, output, pitch); | |
| break; | |
| case ENCODED_FORMAT_BAYER: // Bayer encoded data | |
| // Add new code here for the final steps in decoding the Bayer format | |
| error = ReconstructSampleFrameBayerToBuffer(decoder, info, frame, output, pitch); | |
| break; | |
| case ENCODED_FORMAT_YUV_422: // Original encoding scheme for YUV 4:2:2 | |
| // Add new code here for the final steps in decoding the original YUV 4:2:2 format | |
| error = ReconstructSampleFrameYUV422ToBuffer(decoder, frame, output, pitch); | |
| break; | |
| default: | |
| // Fall through into the old code for reconstructing frames | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| } | |
| } | |
| // Was the newer code able to successfully reconstruct the frame? | |
| if (error != CODEC_ERROR_UNSUPPORTED_FORMAT) | |
| { | |
| // Save the codec error code in the decoder state and return | |
| decoder->error = error; | |
| return; | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoded scale: %d, decoded width: %d, wavelet width: %d\n", decoded_scale, decoded_width, wavelet_width); | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoded width: %d, height: %d, frame width: %d, height: %d, output pitch: %d\n", | |
| decoded_width, decoded_height, info->width, info->height, pitch); | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| IMAGE *wavelet = transform[0]->wavelet[frame]; | |
| int band = 0; | |
| fprintf(logfile, "Luminance wavelet, frame: %d, band: %d\n", frame, band); | |
| DumpArray16s("Lowpass Band", wavelet->band[band], wavelet->width, wavelet->height, wavelet->pitch, logfile); | |
| } | |
| #endif | |
| // Check that the requested frame is large enough to hold the decoded frame | |
| #if (0 && DEBUG) | |
| //if (! (info->width >= decoded_width)) | |
| { | |
| if (logfile) { | |
| //fprintf(logfile, "Requested frame not large enough to hold decoded frame: %d < %d\n", info->width, decoded_width); | |
| fprintf(logfile, "Output frame width: %d, decoded frame width: %d\n", info->width, decoded_width); | |
| } | |
| } | |
| #endif | |
| assert(info->width >= decoded_width); | |
| assert((info->height+7)/8 >= (decoded_height+7)/8); | |
| if (!(info->width >= decoded_width && (info->height+7)/8 >= (decoded_height+7)/8)) { | |
| decoder->error = CODEC_ERROR_FRAMESIZE; | |
| return; | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| //SUBIMAGE subimage = SUBIMAGE_UPPER_LEFT(16, 16); | |
| SUBIMAGE subimage = SUBIMAGE_UPPER_RIGHT(16, 16); | |
| // Adjust the subimage to be at the middle of the right border | |
| //subimage.row += wavelet_height/2 - 8; | |
| DumpBand("SIF Image", wavelet, 0, &subimage, logfile); | |
| } | |
| #endif | |
| START(tk_inverse); | |
| if (resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| int precision = codec->precision; | |
| // Reconstruct the frame to quarter resolution | |
| ReconstructQuarterFrame(decoder, num_channels, frame, output, pitch, | |
| info, &decoder->scratch, precision); | |
| } | |
| else | |
| // Was the first transform a frame transform (used for interlaced frames)? | |
| if (!progressive) | |
| { | |
| // Can the inverse frame transform and output byte packing be done in one pass? | |
| if ((resolution == DECODED_RESOLUTION_FULL) && | |
| (info->format == DECODED_FORMAT_YUYV || info->format == DECODED_FORMAT_UYVY)) | |
| { | |
| // Apply the inverse frame transform and pack the results into the output buffer | |
| int precision = codec->precision; | |
| #if (0 && DEBUG) | |
| DumpWaveletBandsPGM(wavelet, frame, num_channels); | |
| #endif | |
| #if _INTERLACED_WORKER_THREADS | |
| StartInterlaceWorkerThreads(decoder); | |
| //TODO: support new threading | |
| // Send the upper and lower rows of the transforms to the worker threads | |
| TransformInverseFrameThreadedToYUV(decoder, frame, num_channels, output, pitch, | |
| info, chroma_offset, precision); | |
| #else | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| TransformInverseFrameToYUV(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| //#if BUILD_PROSPECT | |
| else if (resolution == DECODED_RESOLUTION_FULL && info->format == DECODED_FORMAT_YR16) | |
| { | |
| // Apply the inverse frame transform and output rows of luma and chroma | |
| //DWORD dwThreadID1; | |
| //DWORD dwThreadID2; | |
| //HANDLE thread1; | |
| //HANDLE thread2; | |
| int precision = codec->precision; | |
| #if _INTERLACED_WORKER_THREADS | |
| StartInterlaceWorkerThreads(decoder); | |
| //TODO: support new threading | |
| // Send the upper and lower rows of the transforms to the worker threads | |
| TransformInverseFrameThreadedToRow16u(decoder, frame, num_channels, | |
| (PIXEL16U *)output, pitch, | |
| info, chroma_offset, precision); | |
| #else | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| TransformInverseFrameToRow16u(decoder, transform_array, frame, num_channels, | |
| (PIXEL16U *)output, pitch, info, | |
| &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| //#endif | |
| else | |
| { | |
| // Reconstruct the frame as separate planes and combine the planes into a packed output image | |
| int channel; | |
| if (resolution == DECODED_RESOLUTION_LOWPASS_ONLY) | |
| { | |
| int scale = 13; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[5]; | |
| if(lowpass_images[channel] == NULL) // therefore IntreFrame compressed. | |
| { | |
| scale = 12; | |
| lowpass_images[channel] = transform_array[channel]->wavelet[2]; | |
| } | |
| } | |
| STOP(tk_inverse); | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, info, chroma_offset, | |
| scale, decoder->codec.encoded_format, decoder->frame.white_point); | |
| START(tk_inverse); | |
| } | |
| else | |
| // In SIF resolution, no need to reconstruct the bottom-level wavelet transforms | |
| // Just copy the lowpass images directly into output frame | |
| if (resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| int precision = codec->precision; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[frame]; | |
| } | |
| STOP(tk_inverse); | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, info, chroma_offset, | |
| precision, decoder->codec.encoded_format, decoder->frame.white_point); | |
| START(tk_inverse); | |
| } | |
| // In full resolution, reconstruct the frame wavelet and | |
| // convert the YUYV output to the specified color format | |
| else | |
| { | |
| int precision = codec->precision; | |
| TransformInverseFrameToBuffer(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| } | |
| } | |
| } | |
| else // The first transform was a spatial transform (used for progressive frames) | |
| { | |
| // Can the inverse frame transform and output byte packing be done in one pass? | |
| if ((resolution == DECODED_RESOLUTION_FULL) && | |
| (info->format == DECODED_FORMAT_YUYV || info->format == DECODED_FORMAT_UYVY) && // Output YUV | |
| decoder->thread_cntrl.capabilities & _CPU_FEATURE_SSE2) | |
| { | |
| int precision = codec->precision; | |
| //DWORD dwThreadID1; | |
| //DWORD dwThreadID2; | |
| //HANDLE thread1; | |
| //HANDLE thread2; | |
| // Apply the inverse frame transform and pack the results into the output buffer | |
| #if _THREADED | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| uint8_t *pixoutput = output; | |
| if(decoder->use_active_metadata_decoder) //WIP | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| pixoutput, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sBayerThruLUT); | |
| } | |
| else | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| pixoutput, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sToBayerYUV); | |
| } | |
| } | |
| else if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2YUV); | |
| } | |
| else | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sToYUV); | |
| } | |
| #else | |
| //TODO : Accelerated BAYER for single thread decoding. | |
| assert(0); | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| //TransformInverseSpatialToYUV(decoder, transform_array, frame, num_channels, output, pitch, info, | |
| // &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| else if ((resolution == DECODED_RESOLUTION_FULL) && decoder->codec.encoded_format == ENCODED_FORMAT_BAYER && | |
| (info->format == DECODED_FORMAT_RGB24 || info->format == DECODED_FORMAT_RGB32) && // Output RGB | |
| decoder->thread_cntrl.capabilities & _CPU_FEATURE_SSE2 && decoder->use_active_metadata_decoder) | |
| { | |
| int precision = codec->precision; | |
| //DWORD dwThreadID1; | |
| //DWORD dwThreadID2; | |
| //HANDLE thread1; | |
| //HANDLE thread2; | |
| // Apply the inverse frame transform and pack the results into the output buffer | |
| #if _THREADED | |
| { | |
| uint8_t *pixoutput = output; | |
| if(info->format == DECODED_FORMAT_RGB24 || info->format == DECODED_FORMAT_RGB32) | |
| { | |
| pixoutput += (info->height-1)*pitch; | |
| pitch = -pitch; | |
| } | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| pixoutput, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sBayerThruLUT); | |
| } | |
| #endif | |
| } | |
| //#if BUILD_PROSPECT | |
| else if (resolution == DECODED_RESOLUTION_FULL && info->format == DECODED_FORMAT_YR16) | |
| { | |
| // Apply the inverse frame transform and output rows of luma and chroma | |
| int precision = codec->precision; | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, num_channels, | |
| (uint8_t *)output, pitch, | |
| info, chroma_offset, precision); | |
| #else | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| (PIXEL16U *)output, pitch, info, | |
| &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| //#endif | |
| else | |
| { | |
| // Reconstruct the frame as separate planes and combine the planes into a packed output image | |
| int channel; | |
| if (resolution == DECODED_RESOLUTION_LOWPASS_ONLY) | |
| { | |
| //int precision = codec->precision; | |
| int scale = 13; | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[5]; | |
| if(lowpass_images[channel] == NULL) // therefore IntreFrame compressed. | |
| { | |
| scale = 12; | |
| lowpass_images[channel] = transform_array[channel]->wavelet[2]; | |
| } | |
| } | |
| STOP(tk_inverse); | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, info, chroma_offset, | |
| scale, decoder->codec.encoded_format, decoder->frame.white_point); | |
| START(tk_inverse); | |
| } | |
| else | |
| // In SIF resolution, no need to reconstruct the bottom-level wavelet transforms | |
| // Just copy the lowpass images directly into output frame | |
| if (resolution == DECODED_RESOLUTION_HALF || resolution == DECODED_RESOLUTION_HALF_NODEBAYER)// || resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| { | |
| int precision = codec->precision; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[frame]; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| char label[_MAX_PATH]; | |
| char *format = decoded_format_string[info->format]; | |
| sprintf(label, "Output, channel: %d, format: %s", channel, format); | |
| DumpImageStatistics(label, lowpass_images[channel], logfile); | |
| } | |
| #endif | |
| } | |
| STOP(tk_inverse); | |
| #if 1 //|| BAYER_SUPPORT | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| #if _THREADED | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| #else | |
| //unsigned short scanline[4096*3],*sptr; | |
| //unsigned short scanline2[4096*3],*sptr2; | |
| unsigned short *scanline,*sptr; | |
| unsigned short *scanline2,*sptr2; | |
| char *buffer = decoder->scratch.free_ptr; | |
| size_t buffer_size = decoder->scratch.free_size; | |
| IMAGE *g_image = lowpass_images[0]; | |
| IMAGE *rg_image = lowpass_images[1]; | |
| IMAGE *bg_image = lowpass_images[2]; | |
| IMAGE *gd_image = lowpass_images[3]; | |
| uint8_t *outyuv,*line = output; | |
| PIXEL *bayer_line, *bayerptr; | |
| PIXEL *G,*RG,*BG,*GD; | |
| int x,y; | |
| int bayer_pitch = info->width*4; | |
| int format = info->format; | |
| bool inverted = false; | |
| int maxbound = 4095; //10-bit source | |
| int midpoint = 32768>>3; | |
| int shift = 4; | |
| if(precision == 12) | |
| { | |
| maxbound = 16383; | |
| midpoint = 32768>>1; | |
| shift = 2; | |
| } | |
| if(buffer_size < info->width * 2 * 3 * 2) | |
| assert(0); // not enough memory | |
| if (format == DECODED_FORMAT_RGB24 || format == DECODED_FORMAT_RGB32) | |
| { | |
| inverted = true; | |
| line += (info->height-1)*pitch; | |
| pitch = -pitch; | |
| } | |
| scanline = (unsigned short *)buffer; | |
| buffer += info->width * 2 * 3; | |
| scanline2 = (unsigned short *)buffer; | |
| G = g_image->band[0]; | |
| RG = rg_image->band[0]; | |
| BG = bg_image->band[0]; | |
| for(y=0; y<info->height; y++) | |
| { | |
| uint8_t *newline = line; | |
| PIXEL *newG=G,*newRG=RG,*newBG=BG; | |
| PIXEL *gptr,*rgptr,*bgptr,*gdptr; | |
| int r,g,b,rg,bg,y1,y2,u,v; | |
| int r1,g1,b1; | |
| int i; | |
| newline += pitch*y; | |
| newG += y * (g_image->pitch / sizeof(PIXEL)); | |
| newRG += y * (rg_image->pitch / sizeof(PIXEL)); | |
| newBG += y * (bg_image->pitch / sizeof(PIXEL)); | |
| gptr = newG; | |
| rgptr = newRG; | |
| bgptr = newBG; | |
| sptr = scanline; | |
| for(x=0; x<info->width; x++) | |
| { | |
| g = (*gptr++); | |
| if(g > maxbound) g = maxbound; | |
| rg = (*rgptr++); | |
| bg = (*bgptr++); | |
| r = (rg<<1) - midpoint + g; | |
| b = (bg<<1) - midpoint + g; | |
| if(r > maxbound) r = maxbound; | |
| if(b > maxbound) b = maxbound; | |
| if(r < 0) r = 0; | |
| if(g < 0) g = 0; | |
| if(b < 0) b = 0; | |
| *sptr++ = r<<shift; | |
| *sptr++ = g<<shift; | |
| *sptr++ = b<<shift; | |
| } | |
| { | |
| int flags = 0; | |
| int whitebitdepth = 16; | |
| sptr = scanline; | |
| if(decoder->apply_color_active_metadata) | |
| sptr = ApplyActiveMetaData(decoder, info->width, 1, y, scanline, scanline2, | |
| info->format, &whitebitdepth, &flags); | |
| ConvertLinesToOutput(decoder, info->width, 1, sptr, | |
| newline, y, pitch, | |
| info->format, whitebitdepth, flags); | |
| } | |
| } | |
| #endif | |
| } | |
| else if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| IMAGE *g_image = lowpass_images[0]; | |
| IMAGE *rg_image = lowpass_images[1]; | |
| IMAGE *bg_image = lowpass_images[2]; | |
| uint8_t *line = output; | |
| unsigned char *rgb8; | |
| PIXEL *G,*RG,*BG; | |
| int x,y; | |
| G = g_image->band[0]; | |
| RG = rg_image->band[0]; | |
| BG = bg_image->band[0]; | |
| if(info->format == DECODED_FORMAT_RGB32) | |
| { | |
| line = output; | |
| line += (info->height-1) * pitch; | |
| for(y=0; y<info->height; y++) | |
| { | |
| PIXEL *gptr,*rgptr,*bgptr; | |
| int r,g,b; | |
| int i,noisearray[32]; | |
| for(i=0; i<32; i++) | |
| { | |
| noisearray[i] = (rand() & 63); | |
| } | |
| gptr = G; | |
| rgptr = RG; | |
| bgptr = BG; | |
| rgb8 = (unsigned char *)line; | |
| for(x=0; x<info->width; x++) | |
| { | |
| int rnd = noisearray[x&31]; | |
| g = ((*gptr++) + rnd) >> 6; | |
| r = ((*rgptr++) + rnd) >> 6; | |
| b = ((*bgptr++) + rnd) >> 6; | |
| if(r < 0) r=0; if(r > 255) r=255; | |
| if(g < 0) g=0; if(g > 255) g=255; | |
| if(b < 0) b=0; if(b > 255) b=255; | |
| *rgb8++ = b; | |
| *rgb8++ = g; | |
| *rgb8++ = r; | |
| *rgb8++ = 255; | |
| } | |
| line -= pitch; | |
| G += g_image->pitch / sizeof(PIXEL); | |
| RG += rg_image->pitch / sizeof(PIXEL); | |
| BG += bg_image->pitch / sizeof(PIXEL); | |
| } | |
| } | |
| else if(info->format == DECODED_FORMAT_RGB24) | |
| { | |
| line = output; | |
| line += (info->height-1) * pitch; | |
| for(y=0; y<info->height; y++) | |
| { | |
| PIXEL *gptr,*rgptr,*bgptr; | |
| int r,g,b; | |
| int i,noisearray[32]; | |
| for(i=0; i<32; i++) | |
| { | |
| noisearray[i] = (rand() & 63); | |
| } | |
| gptr = G; | |
| rgptr = RG; | |
| bgptr = BG; | |
| rgb8 = (unsigned char *)line; | |
| for(x=0; x<info->width; x++) | |
| { | |
| int rnd = noisearray[x&31]; | |
| g = ((*gptr++) + rnd) >> 6; | |
| r = ((*rgptr++) + rnd) >> 6; | |
| b = ((*bgptr++) + rnd) >> 6; | |
| if(r < 0) r=0; if(r > 255) r=255; | |
| if(g < 0) g=0; if(g > 255) g=255; | |
| if(b < 0) b=0; if(b > 255) b=255; | |
| *rgb8++ = b; | |
| *rgb8++ = g; | |
| *rgb8++ = r; | |
| } | |
| line -= pitch; | |
| G += g_image->pitch / sizeof(PIXEL); | |
| RG += rg_image->pitch / sizeof(PIXEL); | |
| BG += bg_image->pitch / sizeof(PIXEL); | |
| } | |
| } | |
| } | |
| else | |
| #endif | |
| { | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, info, chroma_offset, | |
| precision, decoder->codec.encoded_format, decoder->frame.white_point); | |
| } | |
| START(tk_inverse); | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| char label[_MAX_PATH]; | |
| int width = info->width; | |
| int height = info->height; | |
| sprintf(label, "Output"); | |
| DumpBufferStatistics(label, output, width, height, pitch, logfile); | |
| } | |
| #endif | |
| } | |
| // In full resolution, reconstruct the frame wavelet and | |
| // convert the YUYV output to the specified color format | |
| else | |
| { | |
| // Handle inversion of the output image in this routine | |
| FRAME_INFO info2; | |
| int format; | |
| bool inverted = false; | |
| int precision = codec->precision; | |
| memcpy(&info2, info, sizeof(FRAME_INFO)); | |
| format = info2.format; | |
| if (format == DECODED_FORMAT_RGB24) { | |
| format = DECODED_FORMAT_RGB24_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| else if (format == DECODED_FORMAT_RGB32) { | |
| format = DECODED_FORMAT_RGB32_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| if(resolution == DECODED_RESOLUTION_FULL_DEBAYER) | |
| height *= 2; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| //#if BUILD_PROSPECT | |
| // Output the frame in V210 foramt? | |
| if( (format == DECODED_FORMAT_V210 || | |
| format == DECODED_FORMAT_YU64) && | |
| decoder->codec.encoded_format != ENCODED_FORMAT_BAYER ) | |
| { | |
| //char *buffer = decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| int precision = codec->precision; | |
| // The output buffer is an array of 10-bit pixels packed into double words | |
| #if 0 | |
| TransformInverseSpatialToV210(transform_array, frame, num_channels, output, pitch, &info2, | |
| buffer, buffer_size, chroma_offset, decoder->codec.precision); | |
| #else | |
| TransformInverseSpatialToV210(transform_array, frame, num_channels, output, pitch, | |
| &info2, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| else | |
| //#endif | |
| // Decoding a full resolution progressive frame to a Bayer output format? | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| //char *buffer = decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| int precision = codec->precision; | |
| // PIXEL16U *RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(info->width*decoded_height*4*sizeof(PIXEL), 16); | |
| if(decoder->RawBayer16 == NULL) | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| size_t size = info->width*decoded_height*4*sizeof(PIXEL); | |
| decoder->RawBayer16 = | |
| (PIXEL16U *)AllocAligned(allocator, size, 16); | |
| #else | |
| decoder->RawBayer16 = | |
| (PIXEL16U *)MEMORY_ALIGNED_ALLOC(info->width*decoded_height*4*sizeof(PIXEL), 16); | |
| #endif | |
| decoder->RawBayerSize = info->width*decoded_height*4*sizeof(PIXEL); | |
| } | |
| //TODO: Replace this memory allocation with a scratch buffer allocation | |
| //#ifdef SHARPENING | |
| if(decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| size_t size = info->width*decoded_height*4*3*sizeof(PIXEL); | |
| decoder->RGBFilterBuffer16 = | |
| (PIXEL16U *)AllocAligned(allocator, size, 16); | |
| #else | |
| decoder->RGBFilterBuffer16 = | |
| (PIXEL16U *)MEMORY_ALIGNED_ALLOC(info->width*decoded_height*4*3*sizeof(PIXEL), 16); | |
| #endif | |
| decoder->RGBFilterBufferSize = info->width*decoded_height*4*3*sizeof(PIXEL); | |
| } | |
| //#endif | |
| if(decoder->RawBayer16 == NULL || decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| decoder->error = CODEC_ERROR_MEMORY_ALLOC; | |
| return; | |
| } | |
| if(decoder->RawBayer16) | |
| { | |
| uint8_t *line; | |
| PIXEL16U *bayer_line, *bayerptr, *outA16, *outB16; | |
| PIXEL16U *G,*RG,*BG,*GD; | |
| int x,y; | |
| int bayer_pitch = info->width*4; | |
| //float scale = 256.0; | |
| //int matrix_non_unity = 0; | |
| //int wb_non_unity = 0; | |
| //float curve2lin[2048]; | |
| //float lin2curve[2048+512+2]; | |
| #if 0 | |
| static float rgb2yuv[3][4] = | |
| { | |
| {0.183f, 0.614f, 0.062f, 16.0f/256.0f}, | |
| {-0.101f,-0.338f, 0.439f, 0.5f}, | |
| {0.439f,-0.399f,-0.040f, 0.5f} | |
| }; | |
| float mtrx[3][4] = | |
| { | |
| {1.0f, 0, 0, 0}, | |
| {0, 1.0f, 0, 0}, | |
| {0, 0, 1.0f, 0} | |
| }; | |
| float whitebalance[3] = { 1.0f, 1.0f, 1.0f }; | |
| #endif | |
| #if 0 // Matrix disabled as it can only be correct handled by the 3D LUT due to the required linear conversions | |
| /* if(decoder->cfhddata.MagicNumber == CFHDDATA_MAGIC_NUMBER && decoder->cfhddata.version >= 2) | |
| { | |
| float fval = 0.0; | |
| int i; | |
| for(i=0; i<12; i++) | |
| { | |
| mtrx[i>>2][i&3] = fval = decoder->cfhddata.colormatrix[i>>2][i&3]; | |
| if((i>>2) == (i&3)) | |
| { | |
| if(fval != 1.0) | |
| { | |
| matrix_non_unity = 1; | |
| } | |
| } | |
| else | |
| { | |
| if(fval != 0.0) | |
| { | |
| matrix_non_unity = 1; | |
| } | |
| } | |
| } | |
| // not active as VFW isn't yet support the 3D LUTs | |
| if(decoder->cfhddata.version >= 5) | |
| { | |
| int j; | |
| float encode_curvebase = 90.0; | |
| float decode_curvebase = 90.0; | |
| int encode_curve_type = decoder->cfhddata.encode_curve >> 16; | |
| int decode_curve_type = decoder->cfhddata.decode_curve >> 16; | |
| if(decoder->cfhddata.user_white_balance[0] > 0.0) | |
| { | |
| wb_non_unity = 1; | |
| whitebalance[0] = decoder->cfhddata.user_white_balance[0]; | |
| whitebalance[1] = (decoder->cfhddata.user_white_balance[1]+decoder->cfhddata.user_white_balance[2])/2.0; | |
| whitebalance[2] = decoder->cfhddata.user_white_balance[3]; | |
| } | |
| if(encode_curve_type) //1 or 2 | |
| encode_curvebase = (float)((decoder->cfhddata.encode_curve >> 8) & 0xff) / (float)(decoder->cfhddata.encode_curve & 0xff); | |
| else | |
| { | |
| encode_curve_type = 1; | |
| encode_curvebase = 90.0; | |
| } | |
| if(decode_curve_type) //1 or 2 | |
| decode_curvebase = (float)((decoder->cfhddata.decode_curve >> 8) & 0xff) / (float)(decoder->cfhddata.decode_curve & 0xff); | |
| else | |
| { | |
| decode_curve_type = 1; | |
| decode_curvebase = 90.0; | |
| } | |
| for(j=0; j<2048; j++) | |
| { | |
| if(encode_curve_type == 1) | |
| curve2lin[j] = CURVE_LOG2LIN((float)j/2047.0,encode_curvebase); | |
| else | |
| curve2lin[j] = CURVE_GAM2LIN((float)j/2047.0,encode_curvebase); | |
| } | |
| for(j=-512; j<=2048; j++) // -1 to +4 | |
| { | |
| if(encode_curve_type == CURVE_TYPE_LOG) | |
| lin2curve[j+512] = CURVE_LIN2LOG((float)j/512.0,encode_curvebase); | |
| else | |
| lin2curve[j+512] = CURVE_LIN2GAM((float)j/512.0,encode_curvebase); | |
| } | |
| } | |
| }*/ | |
| #endif | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, num_channels, | |
| (uint8_t *)decoder->RawBayer16, bayer_pitch*sizeof(PIXEL), | |
| info, chroma_offset, precision); | |
| #else | |
| // Decode that last transform to rows of Bayer data (one row per channel) | |
| TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| decoder->RawBayer16, bayer_pitch*sizeof(PIXEL), info, | |
| &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| if(resolution == DECODED_RESOLUTION_FULL_DEBAYER && | |
| (info->format < DECODED_FORMAT_BYR1 || info->format > DECODED_FORMAT_BYR4)) | |
| { | |
| #if _THREADED //DemosaicRAW | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| #else | |
| assert(0) // old code disabled | |
| /* int bayer_format = decoder->cfhddata.bayer_format; | |
| unsigned char *outA8, *outB8; | |
| unsigned short *lineStartA16, *lineStartB16; | |
| unsigned short *lineA16, *lineB16; | |
| // int stats1=0, stats2=0, statsd=0; | |
| // double dstats1=0, dstats2=0, dstatsd=0; | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| for(y=0; y<info->height+DEMOSAIC_DELAYLINES; y++) | |
| { | |
| bayer_line = decoder->RawBayer16; | |
| bayer_line += bayer_pitch * y; | |
| if(y<info->height) | |
| { | |
| ColorDifference2Bayer(info->width, | |
| bayer_line, bayer_pitch, bayer_format); | |
| } | |
| if(y>=3+DEMOSAIC_DELAYLINES && y<info->height-3+DEMOSAIC_DELAYLINES) //middle scanline | |
| { | |
| unsigned short *delayptr = decoder->RawBayer16; | |
| delayptr += bayer_pitch * (y-DEMOSAIC_DELAYLINES); | |
| BayerRippleFilter(info->width, | |
| delayptr, bayer_pitch, bayer_format, decoder->RawBayer16); | |
| } | |
| if(y>=DEMOSAIC_DELAYLINES) | |
| { | |
| int delay_y = y - DEMOSAIC_DELAYLINES; | |
| unsigned short *sptr, scanline[8192*3]; | |
| outA8 = line; | |
| line += pitch; | |
| outB8 = line; | |
| line += pitch; | |
| sptr = scanline; | |
| DebayerLine(info->width*2, info->height*2, delay_y*2, | |
| decoder->RawBayer16, bayer_format, sptr, sharpening); | |
| for(x=0; x<info->width*2; x++) | |
| { | |
| outA8[2] = *sptr++>>8; | |
| outA8[1] = *sptr++>>8; | |
| outA8[0] = *sptr++>>8; | |
| outA8+=3; | |
| } | |
| for(x=0; x<info->width*2; x++) | |
| { | |
| outB8[2] = *sptr++>>8; | |
| outB8[1] = *sptr++>>8; | |
| outB8[0] = *sptr++>>8; | |
| outB8+=3; | |
| } | |
| } | |
| }*/ | |
| #endif // _THREADED | |
| } | |
| else | |
| if(format == DECODED_FORMAT_BYR2 || format == DECODED_FORMAT_BYR4) | |
| { | |
| #if _THREADED | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| #else | |
| assert(0) // old code disabled | |
| /* { | |
| int bayer_format = decoder->cfhddata.bayer_format; | |
| // int stats1=0, stats2=0, statsd=0; | |
| // double dstats1=0, dstats2=0, dstatsd=0; | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| for(y=0; y<info->height; y++) | |
| { | |
| outA16 = (PIXEL16U *)line; | |
| line += pitch; | |
| outB16 = (PIXEL16U *)line; | |
| line += pitch; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| GD = BG + bayer_pitch/4; | |
| for(x=0; x<info->width; x++) | |
| { | |
| int r,g,b,rg,bg,gd,g1,g2,y1,y2,u,v,dither; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| gd = (*GD++) - 32768; | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| g1 = g + gd; | |
| g2 = g - gd; //TODO: Is there a DC offset to gd (causes a check in output ) | |
| // stats1+=g1; | |
| // stats2+=g2; | |
| // statsd+=gd; | |
| if(r < 0) r = 0; | |
| if(g1 < 0) g1 = 0; | |
| if(g2 < 0) g2 = 0; | |
| if(b < 0) b = 0; | |
| if(r > 0xffff) r = 0xffff; | |
| if(g1 > 0xffff) g1 = 0xffff; | |
| if(g2 > 0xffff) g2 = 0xffff; | |
| if(b > 0xffff) b = 0xffff; | |
| switch(bayer_format) | |
| { | |
| case BAYER_FORMAT_RED_GRN: //Red-grn phase | |
| *outA16++ = r; | |
| *outA16++ = g1; | |
| *outB16++ = g2; | |
| *outB16++ = b; | |
| break; | |
| case BAYER_FORMAT_GRN_RED:// grn-red | |
| *outA16++ = g1; | |
| *outA16++ = r; | |
| *outB16++ = b; | |
| *outB16++ = g2; | |
| break; | |
| case BAYER_FORMAT_GRN_BLU: | |
| *outA16++ = g1; | |
| *outA16++ = b; | |
| *outB16++ = r; | |
| *outB16++ = g2; | |
| break; | |
| case BAYER_FORMAT_BLU_GRN: | |
| *outA16++ = b; | |
| *outA16++ = g1; | |
| *outB16++ = g2; | |
| *outB16++ = r; | |
| break; | |
| } | |
| } | |
| bayer_line += bayer_pitch; | |
| } | |
| if(decoder->flags & DECODER_FLAGS_HIGH_QUALITY) | |
| { | |
| int bayer_format = decoder->cfhddata.bayer_format; | |
| for(y=2; y<info->height-3; y++) | |
| { | |
| int offset = pitch>>1; | |
| line = output; //0 | |
| line += pitch * y * 2; | |
| // If on a red line, move to a blue line | |
| if(bayer_format == BAYER_FORMAT_GRN_RED || bayer_format == BAYER_FORMAT_RED_GRN) | |
| line -= pitch; | |
| { | |
| int offset = pitch>>1; | |
| outA16 = (PIXEL16U *)line; | |
| outA16++; //g //for BAYER_FORMAT_RED_GRN input | |
| outA16++; //b | |
| outA16++; //g | |
| outA16++; //b | |
| //point to green pixel with *outA16 | |
| if(bayer_format == BAYER_FORMAT_GRN_RED || bayer_format == BAYER_FORMAT_GRN_BLU) | |
| outA16++; | |
| for(x=2; x<info->width-2; x++) | |
| { | |
| int mn,mx,g; | |
| int range = 8*256; //1<<11 | |
| int shift = 11; | |
| int delta; | |
| int alpha; | |
| g = *outA16; | |
| // lines below do not need to be tested for a corrected value | |
| mn = mx = outA16[offset+1]; | |
| if(mn > outA16[offset-1]) mn = outA16[offset-1]; | |
| if(mx < outA16[offset-1]) mx = outA16[offset-1]; | |
| if((outA16[-offset-1] & 1)==0) | |
| { | |
| if(mn > outA16[-offset-1]) mn = outA16[-offset-1]; | |
| if(mx < outA16[-offset-1]) mx = outA16[-offset-1]; | |
| } | |
| if((outA16[-offset+1] & 1)==0) | |
| { | |
| if(mn > outA16[-offset+1]) mn = outA16[-offset+1]; | |
| if(mx < outA16[-offset+1]) mx = outA16[-offset+1]; | |
| } | |
| delta = mx - mn; | |
| if(delta < range && ((mn-range < g && g < mn) || (mx+range > g && g > mx))) | |
| { | |
| int gmn,gmx; | |
| gmn = gmx = g; | |
| if((outA16[-2*offset-2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset-2]) gmn = outA16[-2*offset-2]; | |
| if(gmx < outA16[-2*offset-2]) gmx = outA16[-2*offset-2]; | |
| } | |
| if((outA16[-2*offset] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset]) gmn = outA16[-2*offset]; | |
| if(gmx < outA16[-2*offset]) gmx = outA16[-2*offset]; | |
| } | |
| if((outA16[-2*offset+2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset+2]) gmn = outA16[-2*offset+2]; | |
| if(gmx < outA16[-2*offset+2]) gmx = outA16[-2*offset+2]; | |
| } | |
| if((outA16[-2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2]) gmn = outA16[-2]; | |
| if(gmx < outA16[-2]) gmx = outA16[-2]; | |
| } | |
| // lines below do not need to be tested for a corrected value | |
| if(gmn > outA16[2*offset-2]) gmn = outA16[2*offset-2]; | |
| if(gmx < outA16[2*offset-2]) gmx = outA16[2*offset-2]; | |
| if(gmn > outA16[2*offset]) gmn = outA16[2*offset]; | |
| if(gmx < outA16[2*offset]) gmx = outA16[2*offset]; | |
| if(gmn > outA16[2*offset+2]) gmn = outA16[2*offset+2]; | |
| if(gmx < outA16[2*offset+2]) gmx = outA16[2*offset+2]; | |
| if(gmn > outA16[2]) gmn = outA16[2]; | |
| if(gmx < outA16[2]) gmx = outA16[2]; | |
| if((gmx - gmn) < range) | |
| { | |
| alpha = range;//delta; | |
| if(g > mx) | |
| { | |
| alpha *= (g-mx); //max range | |
| alpha >>= shift; | |
| } | |
| else // g < mn | |
| { | |
| alpha *= (mn-g); //max range | |
| alpha >>= shift; | |
| } | |
| alpha *= alpha; | |
| alpha >>= shift; | |
| // avg = (outA16[-offset-1] + outA16[offset-1] + outA16[-offset+1] + outA16[offset+1] + 2) >> 2; | |
| // *outA16 = avg; //good | |
| // *outA16 = mn; //spotty | |
| if( (abs(outA16[offset] - outA16[-offset]) < range) | |
| && ((abs(outA16[1] - outA16[-1]) < range))) | |
| { | |
| int val = (alpha*g + (range - alpha)*((mn+mx)>>1))>>shift; | |
| if(val > 0xffff) val = 0xffff; | |
| if(val < 0) val = 0; | |
| val |= 1; | |
| *outA16 = val; | |
| // *outA16 = ((mn+mx)>>1) | 1; // like avg but less compute | |
| } | |
| } | |
| } | |
| outA16++; //g | |
| outA16++; //b | |
| } | |
| } | |
| } | |
| } | |
| }*/ | |
| #endif | |
| } | |
| // Pack the rows of Bayer data (full resolution progressive) into BYR3 format? | |
| else if (format == DECODED_FORMAT_BYR3) | |
| { | |
| PIXEL16U *outR, *outG1, *outG2, *outB; | |
| // int stats1=0, stats2=0, statsd=0; | |
| // double dstats1=0, dstats2=0, dstatsd=0; | |
| // #pragma omp parallel for | |
| for(y=0; y<info->height; y++) | |
| { | |
| uint8_t *line = output; | |
| PIXEL *bayerptr = (PIXEL *)decoder->RawBayer16; | |
| line += pitch*2*y; | |
| bayerptr += bayer_pitch * y; | |
| outR = (PIXEL16U *)line; | |
| outG1 = outR + (pitch/4); | |
| outG2 = outR + (pitch/4)*2; | |
| outB = outR + (pitch/4)*3; | |
| G = (PIXEL16U *)bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| GD = BG + bayer_pitch/4; | |
| // Pack the rows of Bayer components into the BYR3 pattern | |
| #if (1 && XMMOPT) | |
| { | |
| __m128i *G_128 = (__m128i *)G; | |
| __m128i *RG_128 = (__m128i *)RG; | |
| __m128i *BG_128 = (__m128i *)BG; | |
| __m128i *GD_128 = (__m128i *)GD; | |
| __m128i *outR_128 = (__m128i *)outR; | |
| __m128i *outG1_128 = (__m128i *)outG1; | |
| __m128i *outG2_128 = (__m128i *)outG2; | |
| __m128i *outB_128 = (__m128i *)outB; | |
| __m128i limiter = _mm_set1_epi16(0x7fff - 0x3ff); | |
| __m128i midpoint1 = _mm_set1_epi16(32768>>6); | |
| __m128i midpoint2 = _mm_set1_epi16(32768>>5); | |
| int column_step = 8; | |
| int post_column = (info->width) - ((info->width) % column_step); | |
| for (x=0; x < post_column; x += column_step) | |
| { | |
| __m128i r_128; | |
| __m128i g1_128; | |
| __m128i g2_128; | |
| __m128i b_128; | |
| __m128i g_128; | |
| __m128i rg_128; | |
| __m128i bg_128; | |
| __m128i gd_128; | |
| g_128 = _mm_load_si128(G_128++); | |
| rg_128 = _mm_load_si128(RG_128++); | |
| bg_128 = _mm_load_si128(BG_128++); | |
| gd_128 = _mm_load_si128(GD_128++); | |
| g_128 = _mm_srli_epi16(g_128, 6); | |
| rg_128 = _mm_srli_epi16(rg_128, 5); | |
| bg_128 = _mm_srli_epi16(bg_128, 5); | |
| gd_128 = _mm_srli_epi16(gd_128, 6); | |
| gd_128 = _mm_subs_epi16(gd_128, midpoint1); | |
| rg_128 = _mm_subs_epi16(rg_128, midpoint2); | |
| bg_128 = _mm_subs_epi16(bg_128, midpoint2); | |
| r_128 = _mm_adds_epi16(rg_128, g_128); | |
| b_128 = _mm_adds_epi16(bg_128, g_128); | |
| g1_128 = _mm_adds_epi16(g_128, gd_128); | |
| g2_128 = _mm_subs_epi16(g_128, gd_128); | |
| r_128 = _mm_adds_epi16(r_128, limiter); | |
| r_128 = _mm_subs_epu16(r_128, limiter); | |
| g1_128 = _mm_adds_epi16(g1_128, limiter); | |
| g1_128 = _mm_subs_epu16(g1_128, limiter); | |
| g2_128 = _mm_adds_epi16(g2_128, limiter); | |
| g2_128 = _mm_subs_epu16(g2_128, limiter); | |
| b_128 = _mm_adds_epi16(b_128, limiter); | |
| b_128 = _mm_subs_epu16(b_128, limiter); | |
| _mm_store_si128(outR_128++, r_128); | |
| _mm_store_si128(outG1_128++, g1_128); | |
| _mm_store_si128(outG2_128++, g2_128); | |
| _mm_store_si128(outB_128++, b_128); | |
| } | |
| G = (PIXEL16U *)G_128; | |
| RG = (PIXEL16U *)RG_128; | |
| BG = (PIXEL16U *)BG_128; | |
| GD = (PIXEL16U *)GD_128; | |
| outR = (PIXEL16U *)outR_128; | |
| outG1 = (PIXEL16U *)outG1_128; | |
| outG2 = (PIXEL16U *)outG2_128; | |
| outB = (PIXEL16U *)outB_128; | |
| } | |
| #endif | |
| for(; x<info->width; x++) | |
| { | |
| int r,g,b,rg,bg,gd,g1,g2; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| gd = (*GD++) - 32768; | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| g1 = g + gd; | |
| g2 = g - gd; //TODO: Is there a DC offset to gd (causes a check in output ) | |
| if(r < 0) r = 0; | |
| if(g1 < 0) g1 = 0; | |
| if(g2 < 0) g2 = 0; | |
| if(b < 0) b = 0; | |
| if(r > 0xffff) r = 0xffff; | |
| if(g1 > 0xffff) g1 = 0xffff; | |
| if(g2 > 0xffff) g2 = 0xffff; | |
| if(b > 0xffff) b = 0xffff; | |
| //Red-grn phase | |
| *outR++ = r>>6; | |
| *outG1++ = g1>>6; | |
| *outG2++ = g2>>6; | |
| *outB++ = b>>6; | |
| } | |
| } | |
| } | |
| // Pack the rows of Bayer data (full resolution progressive) into BYR4 format? | |
| else if (format == DECODED_FORMAT_BYR4) | |
| { | |
| int bayer_format = decoder->cfhddata.bayer_format; | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| for(y=0; y<info->height; y++) | |
| { | |
| outA16 = (PIXEL16U *)line; | |
| line += pitch; | |
| outB16 = (PIXEL16U *)line; | |
| line += pitch; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| GD = BG + bayer_pitch/4; | |
| for(x=0; x<info->width; x++) | |
| { | |
| //int r,g,b,rg,bg,gd,g1,g2,y1,y2,u,v,dither; | |
| int32_t r, g, b, rg, bg, gd, g1, g2; | |
| // The output of the inverse transform is unsigned 16-bit integers | |
| const int midpoint = 32768; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| gd = (*GD++) - midpoint; | |
| r = ((rg - midpoint)<<1) + g; | |
| b = ((bg - midpoint)<<1) + g; | |
| g1 = g + gd; | |
| g2 = g - gd; | |
| r = SATURATE_16U(r); | |
| g1 = SATURATE_16U(g1); | |
| g2 = SATURATE_16U(g2); | |
| b = SATURATE_16U(b); | |
| // stats1+=g1; | |
| // stats2+=g2; | |
| // statsd+=gd; | |
| switch(bayer_format) | |
| { | |
| case BAYER_FORMAT_RED_GRN: //Red-grn phase | |
| *outA16++ = r; | |
| *outA16++ = g1; | |
| *outB16++ = g2; | |
| *outB16++ = b; | |
| break; | |
| case BAYER_FORMAT_GRN_RED:// grn-red | |
| *outA16++ = g1; | |
| *outA16++ = r; | |
| *outB16++ = b; | |
| *outB16++ = g2; | |
| break; | |
| case BAYER_FORMAT_GRN_BLU: | |
| *outA16++ = g1; | |
| *outA16++ = b; | |
| *outB16++ = r; | |
| *outB16++ = g2; | |
| break; | |
| case BAYER_FORMAT_BLU_GRN: | |
| *outA16++ = b; | |
| *outA16++ = g1; | |
| *outB16++ = g2; | |
| *outB16++ = r; | |
| break; | |
| default: | |
| // Unsupported Bayer format | |
| assert(0); | |
| *outA16++ = 0; | |
| *outA16++ = 0; | |
| *outB16++ = 0; | |
| *outB16++ = 0; | |
| break; | |
| } | |
| } | |
| bayer_line += bayer_pitch; | |
| } | |
| if(decoder->flags & DECODER_FLAGS_HIGH_QUALITY) | |
| { | |
| for(y=2; y<info->height-3; y++) | |
| { | |
| //int offset = pitch>>1; | |
| line = output; //0 | |
| line += pitch * y * 2; | |
| // If on a red line, move to a blue line | |
| if(bayer_format == BAYER_FORMAT_GRN_RED || bayer_format == BAYER_FORMAT_RED_GRN) | |
| line -= pitch; | |
| { | |
| int offset = pitch>>1; | |
| outA16 = (PIXEL16U *)line; | |
| outA16++; //g //for BAYER_FORMAT_RED_GRN input | |
| outA16++; //b | |
| outA16++; //g | |
| outA16++; //b | |
| //point to green pixel with *outA16 | |
| if(bayer_format == BAYER_FORMAT_GRN_RED || bayer_format == BAYER_FORMAT_GRN_BLU) | |
| outA16++; | |
| for(x=2; x<info->width-2; x++) | |
| { | |
| int mn,mx,g; | |
| int range = 8*256; //1<<11 | |
| int shift = 11; | |
| int delta; | |
| int alpha; | |
| g = *outA16; | |
| // lines below do not need to be tested for a corrected value | |
| mn = mx = outA16[offset+1]; | |
| if(mn > outA16[offset-1]) mn = outA16[offset-1]; | |
| if(mx < outA16[offset-1]) mx = outA16[offset-1]; | |
| if((outA16[-offset-1] & 1)==0) | |
| { | |
| if(mn > outA16[-offset-1]) mn = outA16[-offset-1]; | |
| if(mx < outA16[-offset-1]) mx = outA16[-offset-1]; | |
| } | |
| if((outA16[-offset+1] & 1)==0) | |
| { | |
| if(mn > outA16[-offset+1]) mn = outA16[-offset+1]; | |
| if(mx < outA16[-offset+1]) mx = outA16[-offset+1]; | |
| } | |
| delta = mx - mn; | |
| if(delta < range && ((mn-range < g && g < mn) || (mx+range > g && g > mx))) | |
| { | |
| int gmn,gmx; | |
| gmn = gmx = g; | |
| if((outA16[-2*offset-2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset-2]) gmn = outA16[-2*offset-2]; | |
| if(gmx < outA16[-2*offset-2]) gmx = outA16[-2*offset-2]; | |
| } | |
| if((outA16[-2*offset] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset]) gmn = outA16[-2*offset]; | |
| if(gmx < outA16[-2*offset]) gmx = outA16[-2*offset]; | |
| } | |
| if((outA16[-2*offset+2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2*offset+2]) gmn = outA16[-2*offset+2]; | |
| if(gmx < outA16[-2*offset+2]) gmx = outA16[-2*offset+2]; | |
| } | |
| if((outA16[-2] & 1)==0) | |
| { | |
| if(gmn > outA16[-2]) gmn = outA16[-2]; | |
| if(gmx < outA16[-2]) gmx = outA16[-2]; | |
| } | |
| // lines below do not need to be tested for a corrected value | |
| if(gmn > outA16[2*offset-2]) gmn = outA16[2*offset-2]; | |
| if(gmx < outA16[2*offset-2]) gmx = outA16[2*offset-2]; | |
| if(gmn > outA16[2*offset]) gmn = outA16[2*offset]; | |
| if(gmx < outA16[2*offset]) gmx = outA16[2*offset]; | |
| if(gmn > outA16[2*offset+2]) gmn = outA16[2*offset+2]; | |
| if(gmx < outA16[2*offset+2]) gmx = outA16[2*offset+2]; | |
| if(gmn > outA16[2]) gmn = outA16[2]; | |
| if(gmx < outA16[2]) gmx = outA16[2]; | |
| if((gmx - gmn) < range) | |
| { | |
| alpha = range;//delta; | |
| if(g > mx) | |
| { | |
| alpha *= (g-mx); //max range | |
| alpha >>= shift; | |
| } | |
| else // g < mn | |
| { | |
| alpha *= (mn-g); //max range | |
| alpha >>= shift; | |
| } | |
| alpha *= alpha; | |
| alpha >>= shift; | |
| // avg = (outA16[-offset-1] + outA16[offset-1] + outA16[-offset+1] + outA16[offset+1] + 2) >> 2; | |
| // *outA16 = avg; //good | |
| // *outA16 = mn; //spotty | |
| if( (abs(outA16[offset] - outA16[-offset]) < range) | |
| && ((abs(outA16[1] - outA16[-1]) < range))) | |
| { | |
| int val = (alpha*g + (range - alpha)*((mn+mx)>>1))>>shift; | |
| if(val > 0xffff) val = 0xffff; | |
| if(val < 0) val = 0; | |
| val |= 1; | |
| *outA16 = val; | |
| // *outA16 = ((mn+mx)>>1) | 1; // like avg but less compute | |
| } | |
| } | |
| } | |
| outA16++; //g | |
| outA16++; //b | |
| } | |
| } | |
| } | |
| } | |
| // Linear restore | |
| { | |
| unsigned short *buff = (unsigned short *)output; | |
| //static int pos = 0; | |
| for(y=0; y<info->height*2; y++) | |
| { | |
| for(x=0; x<info->width*2; x++) | |
| { | |
| float val = (float)buff[y*info->width*2 + x]/65535.0f; | |
| float encode_curvebase = 90.0; | |
| int encode_curve_type = CURVE_TYPE_LOG; | |
| int encode_curve_neg; | |
| if((decoder->cfhddata.encode_curve)>>16) //1 or 2 | |
| { | |
| encode_curve_type = (decoder->cfhddata.encode_curve)>>16; | |
| if(encode_curve_type & CURVE_TYPE_EXTENDED) | |
| encode_curvebase = (float)(decoder->cfhddata.encode_curve & 0xffff); // use all 16-bits for larger log bases | |
| else | |
| encode_curvebase = (float)((decoder->cfhddata.encode_curve >> 8) & 0xff) / (float)(decoder->cfhddata.encode_curve & 0xff); | |
| } | |
| if(encode_curvebase == 1.0 && encode_curve_type <= CURVE_TYPE_LINEAR) | |
| encode_curve_type = CURVE_TYPE_LINEAR; | |
| encode_curve_neg = encode_curve_type & CURVE_TYPE_NEGATIVE; | |
| switch(encode_curve_type & CURVE_TYPE_MASK) | |
| { | |
| case CURVE_TYPE_LOG: | |
| val = CURVE_LOG2LIN(val,encode_curvebase); | |
| break; | |
| case CURVE_TYPE_GAMMA: | |
| val = CURVE_GAM2LIN(val,encode_curvebase); | |
| break; | |
| case CURVE_TYPE_CINEON: | |
| val = CURVE_CINEON2LIN(val,encode_curvebase); | |
| break; | |
| case CURVE_TYPE_CINE985: | |
| val = CURVE_CINE9852LIN(val,encode_curvebase); | |
| break; | |
| case CURVE_TYPE_PARA: | |
| val = CURVE_PARA2LIN(val,(int)((decoder->cfhddata.encode_curve >> 8) & 0xff), (int)(decoder->cfhddata.encode_curve & 0xff)); | |
| break; | |
| case CURVE_TYPE_CSTYLE: | |
| val = CURVE_CSTYLE2LIN((float)val,(int)((decoder->cfhddata.encode_curve >> 8) & 0xff)); | |
| break; | |
| case CURVE_TYPE_SLOG: | |
| val = CURVE_SLOG2LIN((float)val); | |
| break; | |
| case CURVE_TYPE_LOGC: | |
| val = CURVE_LOGC2LIN((float)val); | |
| break; | |
| case CURVE_TYPE_LINEAR: | |
| default: | |
| break; | |
| } | |
| buff[y*info->width*2 + x] = (int)(val*4095.0); | |
| } | |
| } | |
| } | |
| } | |
| else | |
| { | |
| #if _THREADED | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| #else | |
| //unsigned short scanline[8192*3],*sptr; | |
| //unsigned short scanline2[8192*3],*sptr2; | |
| unsigned short *scanline,*sptr; | |
| unsigned short *scanline2,*sptr2; | |
| char *buffer = decoder->scratch.free_ptr; | |
| size_t buffer_size = decoder->scratch.free_size; | |
| uint8_t *outyuv,*line = output; | |
| PIXEL *bayerptr; | |
| int x,y; | |
| if(buffer_size < info->width * 2 * 3 * 2) | |
| assert(0); // not enough memory | |
| scanline = (unsigned short *)buffer; | |
| buffer += info->width * 2 * 3; | |
| scanline2 = (unsigned short *)buffer; | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| for(y=0; y<info->height; y++) | |
| { | |
| int r,g,b,rg,bg,y1,y2,u,v; | |
| int r1,g1,b1; | |
| int i; | |
| __m128i gggggggg,ggggggg2,rgrgrgrg,bgbgbgbg; | |
| __m128i rrrrrrrr,bbbbbbbb; | |
| __m128i mid8192 = _mm_set1_epi16(8192); | |
| __m128i mid16384 = _mm_set1_epi16(16384); | |
| __m128i mid32768 = _mm_set1_epi16(32768); | |
| __m128i overflowprotectRGB_epi16 = _mm_set1_epi16(0x7fff-0x3fff); | |
| int sse2width = info->width & 0xfff8; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| GD = BG + bayer_pitch/4; | |
| sptr = scanline; | |
| x = 0; | |
| for(; x<sse2width; x+=8) | |
| { | |
| gggggggg = _mm_loadu_si128((__m128i *)G); G+=8; | |
| rgrgrgrg = _mm_loadu_si128((__m128i *)RG); RG+=8; | |
| bgbgbgbg = _mm_loadu_si128((__m128i *)BG); BG+=8; | |
| ggggggg2 = _mm_srli_epi16(gggggggg, 2);// 0-16383 14bit unsigned | |
| rgrgrgrg = _mm_srli_epi16(rgrgrgrg, 2);// 14bit unsigned | |
| bgbgbgbg = _mm_srli_epi16(bgbgbgbg, 2);// 14bit unsigned | |
| rrrrrrrr = _mm_subs_epi16(rgrgrgrg, mid8192);// -8191 to 8191 14bit signed | |
| rrrrrrrr = _mm_slli_epi16(rrrrrrrr, 1); // -16382 to 16382 15bit signed | |
| rrrrrrrr = _mm_adds_epi16(rrrrrrrr, ggggggg2); // -16382 to 32767 | |
| bbbbbbbb = _mm_subs_epi16(bgbgbgbg, mid8192);// -8191 to 8191 14bit signed | |
| bbbbbbbb = _mm_slli_epi16(bbbbbbbb, 1); // -16382 to 16382 15bit signed | |
| bbbbbbbb = _mm_adds_epi16(bbbbbbbb, ggggggg2); // -16382 to 32767 | |
| //limit to 0 to 16383 | |
| rrrrrrrr = _mm_adds_epi16(rrrrrrrr, overflowprotectRGB_epi16); | |
| rrrrrrrr = _mm_subs_epu16(rrrrrrrr, overflowprotectRGB_epi16); | |
| //limit to 0 to 16383 | |
| bbbbbbbb = _mm_adds_epi16(bbbbbbbb, overflowprotectRGB_epi16); | |
| bbbbbbbb = _mm_subs_epu16(bbbbbbbb, overflowprotectRGB_epi16); | |
| rrrrrrrr = _mm_slli_epi16(rrrrrrrr, 2); // restore to 0 to 65535 | |
| bbbbbbbb = _mm_slli_epi16(bbbbbbbb, 2); // restore to 0 to 65535 | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 0); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 0); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 0); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 1); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 1); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 1); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 2); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 2); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 2); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 3); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 3); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 3); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 4); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 4); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 4); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 5); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 5); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 5); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 6); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 6); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 6); | |
| *sptr++ = _mm_extract_epi16(rrrrrrrr, 7); | |
| *sptr++ = _mm_extract_epi16(gggggggg, 7); | |
| *sptr++ = _mm_extract_epi16(bbbbbbbb, 7); | |
| } | |
| for(; x<info->width; x++) | |
| { | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| if(r < 0) r = 0; if(r > 0xffff) r = 0xffff; | |
| if(g < 0) g = 0; if(g > 0xffff) g = 0xffff; | |
| if(b < 0) b = 0; if(b > 0xffff) b = 0xffff; | |
| *sptr++ = r; | |
| *sptr++ = g; | |
| *sptr++ = b; | |
| } | |
| { | |
| int flags = 0; | |
| int whitebitdepth = 16; | |
| sptr = scanline; | |
| if(decoder->apply_color_active_metadata) | |
| sptr = ApplyActiveMetaData(decoder, info->width, 1, y, scanline, scanline2, | |
| info->format, &whitebitdepth, &flags); | |
| ConvertLinesToOutput(decoder, info->width, 1, sptr, line, pitch, | |
| info->format, whitebitdepth, flags); | |
| } | |
| line += pitch; | |
| bayer_line += bayer_pitch; | |
| } | |
| #endif | |
| } | |
| /* // switch to using the ApplyActiveMetaData() and ConvertLinesToOutput() calls - DAN20071201 | |
| // Pack the rows of Bayer data (full resolution progressive) into BYR2 format? | |
| else if (format == DECODED_FORMAT_YUYV) | |
| { | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| scale = 256.0; | |
| y_rmult = ((rgb2yuv[0][0]) * scale); | |
| y_gmult = ((rgb2yuv[0][1]) * scale); | |
| y_bmult = ((rgb2yuv[0][2]) * scale); | |
| y_offset= ((rgb2yuv[0][3]) * scale); | |
| u_rmult = ((rgb2yuv[1][0]) * scale); | |
| u_gmult = ((rgb2yuv[1][1]) * scale); | |
| u_bmult = ((rgb2yuv[1][2]) * scale); | |
| u_offset= ((rgb2yuv[1][3]) * scale); | |
| v_rmult = ((rgb2yuv[2][0]) * scale); | |
| v_gmult = ((rgb2yuv[2][1]) * scale); | |
| v_bmult = ((rgb2yuv[2][2]) * scale); | |
| v_offset= ((rgb2yuv[2][3]) * scale); | |
| r_rmult= (mtrx[0][0] * scale * whitebalance[0]); | |
| r_gmult= (mtrx[0][1] * scale * whitebalance[1]); | |
| r_bmult= (mtrx[0][2] * scale * whitebalance[2]); | |
| r_offset= (mtrx[0][3] * scale); | |
| g_rmult= (mtrx[1][0] * scale * whitebalance[0]); | |
| g_gmult= (mtrx[1][1] * scale * whitebalance[1]); | |
| g_bmult= (mtrx[1][2] * scale * whitebalance[2]); | |
| g_offset= (mtrx[1][3] * scale); | |
| b_rmult= (mtrx[2][0] * scale * whitebalance[0]); | |
| b_gmult= (mtrx[2][1] * scale * whitebalance[1]); | |
| b_bmult= (mtrx[2][2] * scale * whitebalance[2]); | |
| b_offset= (mtrx[2][3] * scale); | |
| for(y=0; y<info->height; y++) | |
| { | |
| outyuv = line; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| for(x=0; x<info->width; x+=2) | |
| { | |
| int r,g,b,r1,g1,b1,rg,bg,y1,y2,u,v,dither; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| // dither = (rand() & 65535)<<1; | |
| if(matrix_non_unity) | |
| { | |
| //TODO : need on convert to linear first. | |
| r1= (( r_rmult * r + r_gmult * g + r_bmult * b + r_offset)>>8); | |
| g1= (( g_rmult * r + g_gmult * g + g_bmult * b + g_offset)>>8); | |
| b1= (( b_rmult * r + b_gmult * g + b_bmult * b + b_offset)>>8); | |
| //TODO : need on convert back to log/display curve. | |
| if(r1 < 0) r1 = 0; | |
| if(r1 > 65535) r1 = 65535; | |
| if(g1 < 0) g1 = 0; | |
| if(g1 > 65535) g1 = 65535; | |
| if(b1 < 0) b1 = 0; | |
| if(b1 > 65535) b1 = 65535; | |
| } | |
| else | |
| { | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| } | |
| y1= ( y_rmult * r1 + y_gmult * g1 + y_bmult * b1 + 32768)>>16; | |
| u = (-u_rmult * r1 - u_gmult * g1 + u_bmult * b1 + 32768)>>16; | |
| v = ( v_rmult * r1 - v_gmult * g1 - v_bmult * b1 + 32768)>>16; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| // dither = (rand() & 65535)<<1; | |
| if(matrix_non_unity) | |
| { | |
| //TODO : need on convert to linear first. | |
| r1= (( r_rmult * r + r_gmult * g + r_bmult * b + r_offset)>>8); | |
| g1= (( g_rmult * r + g_gmult * g + g_bmult * b + g_offset)>>8); | |
| b1= (( b_rmult * r + b_gmult * g + b_bmult * b + b_offset)>>8); | |
| //TODO : need on convert back to log/display curve. | |
| if(r1 < 0) r1 = 0; | |
| if(r1 > 65535) r1 = 65535; | |
| if(g1 < 0) g1 = 0; | |
| if(g1 > 65535) g1 = 65535; | |
| if(b1 < 0) b1 = 0; | |
| if(b1 > 65535) b1 = 65535; | |
| } | |
| else | |
| { | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| } | |
| y2 = ( y_rmult * r1 + y_gmult * g1 + y_bmult * b1 + 32768)>>16; | |
| u += (-u_rmult * r1 - u_gmult * g1 + u_bmult * b1 + 32768)>>16; | |
| v += ( v_rmult * r1 - v_gmult * g1 - v_bmult * b1 + 32768)>>16; | |
| u >>= 1; | |
| v >>= 1; | |
| y1 += y_offset; | |
| y2 += y_offset; | |
| u += u_offset; | |
| v += v_offset; | |
| if(y1 < 0) y1 = 0; | |
| if(y1 > 255) y1 = 255; | |
| if(y2 < 0) y2 = 0; | |
| if(y2 > 255) y2 = 255; | |
| if(u < 0) u = 0; | |
| if(u > 255) u = 255; | |
| if(v < 0) v = 0; | |
| if(v > 255) v = 255; | |
| *outyuv++ = y1; | |
| *outyuv++ = u; | |
| *outyuv++ = y2; | |
| *outyuv++ = v; | |
| } | |
| line += pitch; | |
| bayer_line += bayer_pitch; | |
| } | |
| } | |
| else if (format == DECODED_FORMAT_YU64) | |
| { | |
| int shift = 14; | |
| PIXEL16U *outyuv64; | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| scale = 16384.0; | |
| //_mm_empty(); // Clear the mmx register state | |
| y_rmult = ((rgb2yuv[0][0]) * scale); | |
| y_gmult = ((rgb2yuv[0][1]) * scale); | |
| y_bmult = ((rgb2yuv[0][2]) * scale); | |
| y_offset= ((rgb2yuv[0][3]) * scale * 4.0); | |
| u_rmult = ((rgb2yuv[1][0]) * scale); | |
| u_gmult = ((rgb2yuv[1][1]) * scale); | |
| u_bmult = ((rgb2yuv[1][2]) * scale); | |
| u_offset= ((rgb2yuv[1][3]) * scale * 4.0); | |
| v_rmult = ((rgb2yuv[2][0]) * scale); | |
| v_gmult = ((rgb2yuv[2][1]) * scale); | |
| v_bmult = ((rgb2yuv[2][2]) * scale); | |
| v_offset= ((rgb2yuv[2][3]) * scale * 4.0); | |
| scale = 4096.0; | |
| r_rmult= (mtrx[0][0] * scale * whitebalance[0]); | |
| r_gmult= (mtrx[0][1] * scale * whitebalance[1]); | |
| r_bmult= (mtrx[0][2] * scale * whitebalance[2]); | |
| r_offset= (mtrx[0][3] * scale); | |
| g_rmult= (mtrx[1][0] * scale * whitebalance[0]); | |
| g_gmult= (mtrx[1][1] * scale * whitebalance[1]); | |
| g_bmult= (mtrx[1][2] * scale * whitebalance[2]); | |
| g_offset= (mtrx[1][3] * scale); | |
| b_rmult= (mtrx[2][0] * scale * whitebalance[0]); | |
| b_gmult= (mtrx[2][1] * scale * whitebalance[1]); | |
| b_bmult= (mtrx[2][2] * scale * whitebalance[2]); | |
| b_offset= (mtrx[2][3] * scale); | |
| y_offset += 26; | |
| u_offset += 26; | |
| v_offset += 26; | |
| for(y=0; y<info->height; y++) | |
| { | |
| outyuv64 = (PIXEL16U *)line; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| for(x=0; x<info->width; x+=2) | |
| { | |
| int r,g,b,r1,g1,b1,rg,bg,y1,y2,u,v,dither; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| // dither = (rand() & 65535)<<1; | |
| if(matrix_non_unity) | |
| { | |
| //TODO : need on convert to linear first. | |
| r1= (( r_rmult * r + r_gmult * g + r_bmult * b + r_offset)>>12); | |
| g1= (( g_rmult * r + g_gmult * g + g_bmult * b + g_offset)>>12); | |
| b1= (( b_rmult * r + b_gmult * g + b_bmult * b + b_offset)>>12); | |
| //TODO : need on convert back to log/display curve. | |
| if(r1 < 0) r1 = 0; | |
| if(r1 > 65535) r1 = 65535; | |
| if(g1 < 0) g1 = 0; | |
| if(g1 > 65535) g1 = 65535; | |
| if(b1 < 0) b1 = 0; | |
| if(b1 > 65535) b1 = 65535; | |
| } | |
| else | |
| { | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| } | |
| y1= (( y_rmult * r1 + y_gmult * g1 + y_bmult * b1)>>shift) + y_offset; | |
| u = (( u_rmult * r1 + u_gmult * g1 + u_bmult * b1)>>shift); | |
| v = (( v_rmult * r1 + v_gmult * g1 + v_bmult * b1)>>shift); | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| // dither = (rand() & 65535)<<1; | |
| if(matrix_non_unity) | |
| { | |
| //TODO : need on convert to linear first. | |
| r1= (( r_rmult * r + r_gmult * g + r_bmult * b + r_offset)>>12); | |
| g1= (( g_rmult * r + g_gmult * g + g_bmult * b + g_offset)>>12); | |
| b1= (( b_rmult * r + b_gmult * g + b_bmult * b + b_offset)>>12); | |
| //TODO : need on convert back to log/display curve. | |
| if(r1 < 0) r1 = 0; | |
| if(r1 > 65535) r1 = 65535; | |
| if(g1 < 0) g1 = 0; | |
| if(g1 > 65535) g1 = 65535; | |
| if(b1 < 0) b1 = 0; | |
| if(b1 > 65535) b1 = 65535; | |
| } | |
| else | |
| { | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| } | |
| y2= (( y_rmult * r1 + y_gmult * g1 + y_bmult * b1)>>shift) + y_offset; | |
| u+= (( u_rmult * r1 + u_gmult * g1 + u_bmult * b1)>>shift); | |
| v+= (( v_rmult * r1 + v_gmult * g1 + v_bmult * b1)>>shift); | |
| u >>= 1; | |
| v >>= 1; | |
| u += u_offset; | |
| v += v_offset; | |
| if(y1 < 0) y1 = 0; | |
| if(y1 > 65535) y1 = 65535; | |
| if(y2 < 0) y2 = 0; | |
| if(y2 > 65535) y2 = 65535; | |
| if(u < 0) u = 0; | |
| if(u > 65535) u = 65535; | |
| if(v < 0) v = 0; | |
| if(v > 65535) v = 65535; | |
| *outyuv64++ = y1; | |
| *outyuv64++ = v; | |
| *outyuv64++ = y2; | |
| *outyuv64++ = u; | |
| } | |
| line += pitch; | |
| bayer_line += bayer_pitch; | |
| } | |
| } | |
| else //RGBs | |
| { | |
| line = output; | |
| bayer_line = decoder->RawBayer16; | |
| scale = 256.0; | |
| r_rmult = (mtrx[0][0]) * scale * whitebalance[0]; | |
| r_gmult = (mtrx[0][1]) * scale * whitebalance[1]; | |
| r_bmult = (mtrx[0][2]) * scale * whitebalance[2]; | |
| r_offset= (mtrx[0][3]) * scale; | |
| g_rmult = (mtrx[1][0]) * scale * whitebalance[0]; | |
| g_gmult = (mtrx[1][1]) * scale * whitebalance[1]; | |
| g_bmult = (mtrx[1][2]) * scale * whitebalance[2]; | |
| g_offset= (mtrx[1][3]) * scale; | |
| b_rmult = (mtrx[2][0]) * scale * whitebalance[0]; | |
| b_gmult = (mtrx[2][1]) * scale * whitebalance[1]; | |
| b_bmult = (mtrx[2][2]) * scale * whitebalance[2]; | |
| b_offset= (mtrx[2][3]) * scale; | |
| for(y=0; y<info->height; y++) | |
| { | |
| int i,noisearray[32]; | |
| outyuv = line; | |
| bayerptr = bayer_line; | |
| G = bayerptr; | |
| RG = G + bayer_pitch/4; | |
| BG = RG + bayer_pitch/4; | |
| GD = RG + bayer_pitch/4; | |
| for(i=0; i<32; i++) | |
| { | |
| noisearray[i] = (rand() & 127); | |
| } | |
| if(info->format == DECODED_FORMAT_RGB32) | |
| { | |
| for(x=0; x<info->width; x++) | |
| { | |
| int R1,G1,B1; | |
| int rnd = noisearray[x&31]; | |
| // *ptr++ = *bayerptr++ >> 8; | |
| // *ptr++ = 0x80; | |
| // *ptr++ = *bayerptr++ >> 8; | |
| // *ptr++ = 0x80; | |
| int r,g,b,g1,g2,gdiff,y1,y2,u,v; | |
| // g = (g1+g2)>>1; | |
| // *g_row_ptr++ = g; | |
| // *rg_row_ptr++ = (r-g+256)>>1; | |
| // *bg_row_ptr++ = (b-g+256)>>1; | |
| // *gdiff_row_ptr++ = (g1-g2+256)>>1; | |
| g = ((*G++)>>1); | |
| r = ((*RG++ + 64)>>0)-(256<<7)+g; | |
| b = ((*BG++ + 64)>>0)-(256<<7)+g; | |
| // gdiff = ((*GD++ + 64)>>7)-256+g; | |
| if(matrix_non_unity) | |
| { | |
| //TODO : need on convert to linear first. | |
| R1 = ((r*r_rmult + g*r_gmult + b*r_bmult + r_offset)>>8) + rnd; | |
| G1 = ((r*g_rmult + g*g_gmult + b*g_bmult + g_offset)>>8) + rnd; | |
| B1 = ((r*b_rmult + g*b_gmult + b*b_bmult + b_offset)>>8) + rnd; | |
| //TODO : need on convert back to log/display curve. | |
| } | |
| else | |
| { | |
| R1 = r + rnd; | |
| G1 = g + rnd; | |
| B1 = b + rnd; | |
| } | |
| R1 >>= 7; | |
| G1 >>= 7; | |
| B1 >>= 7; | |
| if(R1 < 0) R1 = 0; | |
| if(R1 > 255) R1 = 255; | |
| if(G1 < 0) G1 = 0; | |
| if(G1 > 255) G1 = 255; | |
| if(B1 < 0) B1 = 0; | |
| if(B1 > 255) B1 = 255; | |
| *outyuv++ = B1; | |
| *outyuv++ = G1; | |
| *outyuv++ = R1; | |
| *outyuv++ = 255; | |
| } | |
| } | |
| else | |
| { | |
| for(x=0; x<info->width; x++) | |
| { | |
| int R1,G1,B1; | |
| int rnd = noisearray[x&31]; | |
| // *ptr++ = *bayerptr++ >> 8; | |
| // *ptr++ = 0x80; | |
| // *ptr++ = *bayerptr++ >> 8; | |
| // *ptr++ = 0x80; | |
| int r,g,b,g1,g2,gdiff,y1,y2,u,v; | |
| //g = (g1+g2)>>1; | |
| // *g_row_ptr++ = g; | |
| // *rg_row_ptr++ = (r-g+256)>>1; | |
| // *bg_row_ptr++ = (b-g+256)>>1; | |
| // *gdiff_row_ptr++ = (g1-g2+256)>>1; | |
| g = ((*G++)>>1); | |
| r = ((*RG++ + 64)>>0)-(256<<7)+g; | |
| b = ((*BG++ + 64)>>0)-(256<<7)+g; | |
| // gdiff = ((*GD++ + 64)>>7)-256+g; | |
| if(matrix_non_unity) | |
| { | |
| //TODO: Need to convert to linear first. | |
| R1 = ((r*r_rmult + g*r_gmult + b*r_bmult + r_offset)>>8) + rnd; | |
| G1 = ((r*g_rmult + g*g_gmult + b*g_bmult + g_offset)>>8) + rnd; | |
| B1 = ((r*b_rmult + g*b_gmult + b*b_bmult + b_offset)>>8) + rnd; | |
| //TODO: Need to convert back to log/display curve. | |
| } | |
| else | |
| { | |
| R1 = r + rnd; | |
| G1 = g + rnd; | |
| B1 = b + rnd; | |
| } | |
| R1 >>= 7; | |
| G1 >>= 7; | |
| B1 >>= 7; | |
| if(R1 < 0) R1 = 0; | |
| if(R1 > 255) R1 = 255; | |
| if(G1 < 0) G1 = 0; | |
| if(G1 > 255) G1 = 255; | |
| if(B1 < 0) B1 = 0; | |
| if(B1 > 255) B1 = 255; | |
| *outyuv++ = B1; | |
| *outyuv++ = G1; | |
| *outyuv++ = R1; | |
| } | |
| } | |
| line += pitch; | |
| bayer_line += bayer_pitch; | |
| } | |
| } | |
| */ | |
| //MEMORY_ALIGNED_FREE(RawBayer16); | |
| } | |
| } | |
| else | |
| if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| int precision = codec->precision; | |
| if(decoder->RawBayer16 == NULL) | |
| { | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| size_t size = info->width*info->height*num_channels*sizeof(PIXEL); | |
| decoder->RawBayer16 = | |
| (PIXEL16U *)AllocAligned(allocator, size, 16); | |
| #else | |
| decoder->RawBayer16 = | |
| (PIXEL16U *)MEMORY_ALIGNED_ALLOC(info->width*info->height*num_channels*sizeof(PIXEL), 16); | |
| #endif | |
| decoder->RawBayerSize = info->width*info->height*num_channels*sizeof(PIXEL); | |
| } | |
| //#ifdef SHARPENING | |
| if(decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| int frame_size = info->width*decoded_height*4*3*sizeof(PIXEL); | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| frame_size = info->width*decoded_height*4*4*sizeof(PIXEL); | |
| #if _ALLOCATOR | |
| { | |
| ALLOCATOR *allocator = decoder->allocator; | |
| decoder->RGBFilterBuffer16 = | |
| (PIXEL16U *)AllocAligned(allocator, frame_size, 16); | |
| } | |
| #else | |
| decoder->RGBFilterBuffer16 = | |
| (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, 16); | |
| #endif | |
| decoder->RGBFilterBufferSize = frame_size; | |
| } | |
| //#endif | |
| if(decoder->RawBayer16 == NULL || decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| decoder->error = CODEC_ERROR_MEMORY_ALLOC; | |
| return; | |
| } | |
| //TODO: Replace this memory allocation with a scratch buffer allocation | |
| if(decoder->RawBayer16) | |
| { | |
| uint8_t *outyuv,*line, *source_line; | |
| PIXEL16U *bayerptr; | |
| PIXEL16U *G,*RG,*BG; | |
| int x,y; | |
| int src_pitch = info->width*num_channels*sizeof(PIXEL); | |
| int y_rmult,y_gmult,y_bmult,y_offset;//shift=8; | |
| int u_rmult,u_gmult,u_bmult,u_offset; | |
| int v_rmult,v_gmult,v_bmult,v_offset; | |
| float scale = 256.0; | |
| //int matrix_non_unity = 0; | |
| //int wb_non_unity = 0; | |
| //float curve2lin[2048]; | |
| //float lin2curve[2048+512+2]; | |
| static float rgb2yuv[3][4] = | |
| { | |
| {0.183f, 0.614f, 0.062f, 16.0f/256.0f}, | |
| {-0.101f,-0.338f, 0.439f, 0.5f}, | |
| {0.439f,-0.399f,-0.040f, 0.5} | |
| }; | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, num_channels, | |
| (uint8_t *)decoder->RawBayer16, src_pitch, | |
| info, chroma_offset, precision); | |
| #else | |
| TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| decoder->RawBayer16, src_pitch, info, | |
| &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| if (format == DECODED_FORMAT_YUYV) | |
| { | |
| line = output; | |
| source_line = (unsigned char *)decoder->RawBayer16; | |
| scale = 256.0; | |
| y_rmult = (int)((rgb2yuv[0][0])); | |
| y_gmult = (int)((rgb2yuv[0][1])); | |
| y_bmult = (int)((rgb2yuv[0][2])); | |
| y_offset= (int)((rgb2yuv[0][3])); | |
| u_rmult = (int)((rgb2yuv[1][0])); | |
| u_gmult = (int)((rgb2yuv[1][1])); | |
| u_bmult = (int)((rgb2yuv[1][2])); | |
| u_offset= (int)((rgb2yuv[1][3])); | |
| v_rmult = (int)((rgb2yuv[2][0])); | |
| v_gmult = (int)((rgb2yuv[2][1])); | |
| v_bmult = (int)((rgb2yuv[2][2])); | |
| v_offset= (int)((rgb2yuv[2][3])); | |
| for(y=0; y<info->height; y++) | |
| { | |
| outyuv = line; | |
| bayerptr = (PIXEL16U *)source_line; | |
| G = bayerptr; | |
| RG = G + src_pitch/(2*num_channels); | |
| BG = RG + src_pitch/(2*num_channels); | |
| for(x=0; x<info->width; x+=2) | |
| { | |
| int r,g,b,r1,g1,b1,rg,bg,y1,y2,u,v; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| y1= ( y_rmult * r1 + y_gmult * g1 + y_bmult * b1 + 32768)>>16; | |
| u = (-u_rmult * r1 - u_gmult * g1 + u_bmult * b1 + 32768)>>16; | |
| v = ( v_rmult * r1 - v_gmult * g1 - v_bmult * b1 + 32768)>>16; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| y2 = ( y_rmult * r1 + y_gmult * g1 + y_bmult * b1 + 32768)>>16; | |
| u += (-u_rmult * r1 - u_gmult * g1 + u_bmult * b1 + 32768)>>16; | |
| v += ( v_rmult * r1 - v_gmult * g1 - v_bmult * b1 + 32768)>>16; | |
| u >>= 1; | |
| v >>= 1; | |
| y1 += y_offset; | |
| y2 += y_offset; | |
| u += u_offset; | |
| v += v_offset; | |
| if(y1 < 0) y1 = 0; | |
| if(y1 > 255) y1 = 255; | |
| if(y2 < 0) y2 = 0; | |
| if(y2 > 255) y2 = 255; | |
| if(u < 0) u = 0; | |
| if(u > 255) u = 255; | |
| if(v < 0) v = 0; | |
| if(v > 255) v = 255; | |
| *outyuv++ = y1; | |
| *outyuv++ = u; | |
| *outyuv++ = y2; | |
| *outyuv++ = v; | |
| } | |
| line += pitch; | |
| source_line += src_pitch; | |
| } | |
| } | |
| else if (format == DECODED_FORMAT_YU64) | |
| { | |
| int shift = 14; | |
| PIXEL16U *outyuv64; | |
| line = output; | |
| source_line = (unsigned char *)decoder->RawBayer16; | |
| scale = 16384.0; | |
| y_rmult = (int)((rgb2yuv[0][0]) * scale); | |
| y_gmult = (int)((rgb2yuv[0][1]) * scale); | |
| y_bmult = (int)((rgb2yuv[0][2]) * scale); | |
| y_offset= (int)((rgb2yuv[0][3]) * scale * 4.0f); | |
| u_rmult = (int)((rgb2yuv[1][0]) * scale); | |
| u_gmult = (int)((rgb2yuv[1][1]) * scale); | |
| u_bmult = (int)((rgb2yuv[1][2]) * scale); | |
| u_offset= (int)((rgb2yuv[1][3]) * scale * 4.0f); | |
| v_rmult = (int)((rgb2yuv[2][0]) * scale); | |
| v_gmult = (int)((rgb2yuv[2][1]) * scale); | |
| v_bmult = (int)((rgb2yuv[2][2]) * scale); | |
| v_offset= (int)((rgb2yuv[2][3]) * scale * 4.0f); | |
| scale = 4096.0; | |
| y_offset += 26; | |
| u_offset += 26; | |
| v_offset += 26; | |
| for(y=0; y<info->height; y++) | |
| { | |
| outyuv64 = (PIXEL16U *)line; | |
| bayerptr = (PIXEL16U *)source_line; | |
| G = bayerptr; | |
| RG = G + src_pitch/(2*num_channels); | |
| BG = RG + src_pitch/(2*num_channels); | |
| for(x=0; x<info->width; x+=2) | |
| { | |
| int r,g,b,r1,g1,b1,rg,bg,y1,y2,u,v; | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| y1= (( y_rmult * r1 + y_gmult * g1 + y_bmult * b1)>>shift) + y_offset; | |
| u = (( u_rmult * r1 + u_gmult * g1 + u_bmult * b1)>>shift); | |
| v = (( v_rmult * r1 + v_gmult * g1 + v_bmult * b1)>>shift); | |
| g = (*G++); | |
| rg = (*RG++); | |
| bg = (*BG++); | |
| r = ((rg - 32768)<<1) + g; | |
| b = ((bg - 32768)<<1) + g; | |
| r1 = r; | |
| g1 = g; | |
| b1 = b; | |
| y2= (( y_rmult * r1 + y_gmult * g1 + y_bmult * b1)>>shift) + y_offset; | |
| u+= (( u_rmult * r1 + u_gmult * g1 + u_bmult * b1)>>shift); | |
| v+= (( v_rmult * r1 + v_gmult * g1 + v_bmult * b1)>>shift); | |
| u >>= 1; | |
| v >>= 1; | |
| u += u_offset; | |
| v += v_offset; | |
| if(y1 < 0) y1 = 0; | |
| if(y1 > 65535) y1 = 65535; | |
| if(y2 < 0) y2 = 0; | |
| if(y2 > 65535) y2 = 65535; | |
| if(u < 0) u = 0; | |
| if(u > 65535) u = 65535; | |
| if(v < 0) v = 0; | |
| if(v > 65535) v = 65535; | |
| *outyuv64++ = y1; | |
| *outyuv64++ = v; | |
| *outyuv64++ = y2; | |
| *outyuv64++ = u; | |
| } | |
| line += pitch; | |
| source_line += src_pitch; | |
| } | |
| } | |
| else //RGBs | |
| { | |
| line = output; | |
| source_line = (unsigned char *)decoder->RawBayer16; | |
| for(y=0; y<info->height; y++) | |
| { | |
| int i,noisearray[32]; | |
| unsigned short *rgb16 = (unsigned short *)line; | |
| outyuv = line; | |
| bayerptr = (PIXEL16U *)source_line; | |
| G = bayerptr; | |
| RG = G + src_pitch/(2*num_channels); | |
| BG = RG + src_pitch/(2*num_channels); | |
| for(i=0; i<32; i++) | |
| { | |
| noisearray[i] = (rand() & 255); | |
| } | |
| if(info->format == DECODED_FORMAT_RGB32) | |
| { | |
| for(x=0; x<info->width; x++) | |
| { | |
| int R1,G1,B1; | |
| int rnd = noisearray[x&31]; | |
| #if 0 | |
| G1 = (*G++) + rnd; | |
| R1 = ((*RG++<<1) - (128<<9)) + G1; | |
| B1 = ((*BG++<<1) - (128<<9)) + G1; | |
| #else | |
| G1 = (*G++) + rnd; | |
| R1 = (*RG++) + rnd; | |
| B1 = (*BG++) + rnd; | |
| #endif | |
| R1 >>= 8; | |
| G1 >>= 8; | |
| B1 >>= 8; | |
| if(R1 < 0) R1 = 0; | |
| if(R1 > 255) R1 = 255; | |
| if(G1 < 0) G1 = 0; | |
| if(G1 > 255) G1 = 255; | |
| if(B1 < 0) B1 = 0; | |
| if(B1 > 255) B1 = 255; | |
| *outyuv++ = B1; | |
| *outyuv++ = G1; | |
| *outyuv++ = R1; | |
| *outyuv++ = 255; | |
| } | |
| } | |
| else if(info->format == DECODED_FORMAT_RGB24) | |
| { | |
| for(x=0; x<info->width; x++) | |
| { | |
| int R1,G1,B1; | |
| int rnd = noisearray[x&31]; | |
| #if 0 | |
| G1 = (*G++) + rnd; | |
| R1 = ((*RG++<<1) - (128<<9)) + G1; | |
| B1 = ((*BG++<<1) - (128<<9)) + G1; | |
| #else | |
| G1 = (*G++) + rnd; | |
| R1 = (*RG++) + rnd; | |
| B1 = (*BG++) + rnd; | |
| #endif | |
| R1 >>= 8; | |
| G1 >>= 8; | |
| B1 >>= 8; | |
| if(R1 < 0) R1 = 0; | |
| if(R1 > 255) R1 = 255; | |
| if(G1 < 0) G1 = 0; | |
| if(G1 > 255) G1 = 255; | |
| if(B1 < 0) B1 = 0; | |
| if(B1 > 255) B1 = 255; | |
| *outyuv++ = B1; | |
| *outyuv++ = G1; | |
| *outyuv++ = R1; | |
| } | |
| } | |
| else if(info->format == DECODED_FORMAT_RG48) | |
| { | |
| for(x=0; x<info->width; x++) | |
| { | |
| int R1,G1,B1; | |
| G1 = (*G++); | |
| R1 = (*RG++); | |
| B1 = (*BG++); | |
| *rgb16++ = R1; | |
| *rgb16++ = G1; | |
| *rgb16++ = B1; | |
| } | |
| } | |
| line += pitch; | |
| source_line += src_pitch; | |
| } | |
| } | |
| //MEMORY_ALIGNED_FREE(RawBayer16); | |
| } | |
| } | |
| else // Output the frame in one of the RGB 8-bit formats | |
| { | |
| //char *buffer = decoder->buffer; | |
| //size_t buffer_size = decoder->buffer_size; | |
| // Invert the bottom wavelet and convert the output to the requested color format | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sYUVtoRGB); | |
| #else | |
| TransformInverseSpatialToBuffer(decoder, transform_array, frame, num_channels, output, pitch, | |
| &info2, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| } | |
| } | |
| } | |
| #if TIMING | |
| // Count the number of progressive frames that were decoded | |
| progressive_decode_count++; | |
| #endif | |
| } | |
| STOP(tk_inverse); | |
| #ifdef ADOBE_MEMORY_FUNCTIONS | |
| if((decoder->RawBayer16 && decoder->RawBayerSize > 2048*1152*2) || | |
| (decoder->RGBFilterBuffer16 && decoder->RGBFilterBufferSize > 2048*1152*2)) | |
| { | |
| #if _ALLOCATOR | |
| if(decoder->RawBayer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = NULL; | |
| } | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| decoder->RGBFilterBufferSize = NULL; | |
| } | |
| #else | |
| if(decoder->RawBayer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = NULL; | |
| } | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| decoder->RGBFilterBufferSize = NULL; | |
| } | |
| #endif | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| //uint8_t *subimage = output; | |
| uint8_t *subimage = output + (2 * info->width) - 16; | |
| DumpArray8u("YUV Image", subimage, 16, 16, pitch, logfile); | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Exit ReconstructFrameToBuffer\n"); | |
| } | |
| #endif | |
| #if (0 && DEBUG && _WIN32) | |
| _CrtCheckMemory(); | |
| #endif | |
| } | |
| #if 0 | |
| // Reconstruct the frame to quarter resolution at full frame rate | |
| void ReconstructQuarterFrame(DECODER *decoder, int num_channels, | |
| uint8_t *frame1, uint8_t *frame2, int output_pitch, | |
| FRAME_INFO *info, char *buffer, size_t buffer_size) | |
| { | |
| TRANSFORM **transform_array = decoder->transform; | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| PIXEL *low_row_ptr[CODEC_MAX_CHANNELS]; | |
| PIXEL *high_row_ptr[CODEC_MAX_CHANNELS]; | |
| PIXEL *out1_row_ptr[CODEC_MAX_CHANNELS]; | |
| PIXEL *out2_row_ptr[CODEC_MAX_CHANNELS]; | |
| PIXEL *bufptr = (PIXEL *)buffer; | |
| uint8_t *output_row_ptr = output; | |
| int low_pitch[CODEC_MAX_CHANNELS]; | |
| int high_pitch[CODEC_MAX_CHANNELS]; | |
| int channel; | |
| int row; | |
| // Check that there is enough space for the intermediate results from each channel | |
| assert(output_width * sizeof(PIXEL) < buffer_size); | |
| // Get pointers into the wavelets for each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Get the lowpass bands from the two wavelets for the two halves of the temporal wavelet | |
| IMAGE *low_wavelet = transform_array[channel]->wavelet[3]; | |
| IMAGE *high_wavelet = transform_array[channel]->wavelet[2]; | |
| // Get the pointers to the first row in each lowpass band | |
| low_row_ptr[channel] = low_wavelet->band[0]; | |
| high_row_ptr[channel] = high_wavelet->band[0]; | |
| low_pitch[channel] = low_wavelet->pitch / sizeof(PIXEL); | |
| high_pitch[channel] = high_wavelet->pitch / sizeof(PIXEL); | |
| // Allocate space for one row of results for this channel | |
| channel_row_ptr[channel] = bufptr; | |
| bufptr += low_wavelet->width; | |
| } | |
| for (row = 0; row < output_height; row++) | |
| { | |
| char *bufptr = buffer; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Invert the temporal transform at quarter resolution | |
| InvertTemporalQuarterRow16s(low_row_ptr[channel], high_row_ptr[channel], channel_row_ptr[channel]); | |
| // Advance to the next row in each band for the temporal transform | |
| low_row_ptr[channel] += low_pitch[channel]; | |
| high_row_ptr[channel] += high_pitch[channel]; | |
| } | |
| // Pack the intermediate results into the output row | |
| ConvertUnpacked16sRowToPacked8u(channel_row_ptr, num_channels, output_row_ptr, output_width); | |
| // Advance the output row pointer | |
| output_row_ptr += output_pitch; | |
| } | |
| } | |
| #else | |
| // Reconstruct the frame to quarter resolution at full frame rate | |
| void ReconstructQuarterFrame(DECODER *decoder, int num_channels, | |
| int frame_index, uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, const SCRATCH *scratch, int precision) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| TRANSFORM **transform_array = decoder->transform; | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| PIXEL *low_row_ptr[CODEC_MAX_CHANNELS]; | |
| PIXEL *high_row_ptr[CODEC_MAX_CHANNELS]; | |
| uint8_t *output_row_ptr = output; | |
| int low_pitch[CODEC_MAX_CHANNELS]; | |
| int high_pitch[CODEC_MAX_CHANNELS]; | |
| int channel; | |
| int row; | |
| // Value used for filling the fourth channel in ARGB output | |
| int alpha = 255; | |
| int format = COLORFORMAT(info); | |
| int color_space = COLORSPACE(info); | |
| int decoded_format = DECODEDFORMAT(info); | |
| //bool inverted = false; | |
| // The pixels are descaled in the inverse temporal transform | |
| //const int descale = 0; | |
| // Shift the intermediate results to 16-bit pixels | |
| const int shift_yu64 = 8; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| #if DEBUG | |
| size_t buffer_size = scratch->free_size; | |
| #endif | |
| // Initialize a pointer for allocating space in the buffer | |
| PIXEL *bufptr = (PIXEL *)buffer; | |
| // Array of pointers to the start of each channel in the intermediate results | |
| PIXEL *channel_row_ptr[CODEC_MAX_CHANNELS]; | |
| // Check that there is enough space for the intermediate results from each channel | |
| #if DEBUG | |
| assert(output_width * sizeof(PIXEL) < buffer_size); | |
| #endif | |
| ComputeCube(decoder); | |
| // Get pointers into the wavelets for each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Get the lowpass bands from the two wavelets for the two halves of the temporal wavelet | |
| IMAGE *low_wavelet = transform_array[channel]->wavelet[4]; | |
| IMAGE *high_wavelet = transform_array[channel]->wavelet[3]; | |
| // Get the pointers to the first row in each lowpass band | |
| low_row_ptr[channel] = low_wavelet->band[0]; | |
| high_row_ptr[channel] = high_wavelet->band[0]; | |
| low_pitch[channel] = low_wavelet->pitch / sizeof(PIXEL); | |
| high_pitch[channel] = high_wavelet->pitch / sizeof(PIXEL); | |
| // Force the row of intermediate results to be properly aligned | |
| bufptr = (PIXEL *)ALIGN16(bufptr); | |
| // Allocate space for one row of results for this channel | |
| channel_row_ptr[channel] = bufptr; | |
| bufptr += low_wavelet->width; | |
| // Check that the row of intermediate results is properly aligned | |
| assert(ISALIGNED16(channel_row_ptr[channel])); | |
| } | |
| // Invert the image if required | |
| switch (decoded_format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB32: | |
| output_row_ptr += (output_height - 1) * output_pitch; | |
| output_pitch = NEG(output_pitch); | |
| } | |
| //HACK: Seems to work, I don't know why. //DAN20070304 | |
| if (precision == 12) precision = 8; | |
| // Apply the inverse temporal transform to the lowpass and highpass rows | |
| for (row = 0; row < output_height; row++) | |
| { | |
| // Most of the color conversion routines use zero descaling | |
| int descale = 0; | |
| //char *bufptr = buffer; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| if (frame_index == 0) | |
| { | |
| // Invert the temporal transform at quarter resolution to get the even row | |
| InvertTemporalQuarterEvenRow16s(low_row_ptr[channel], high_row_ptr[channel], | |
| channel_row_ptr[channel], output_width, precision); | |
| } | |
| else | |
| { | |
| assert(frame_index == 1); | |
| // Invert the temporal transform at quarter resolution to get the odd row | |
| InvertTemporalQuarterOddRow16s(low_row_ptr[channel], high_row_ptr[channel], | |
| channel_row_ptr[channel], output_width, precision); | |
| } | |
| // Advance to the next row in each band for the temporal transform | |
| low_row_ptr[channel] += low_pitch[channel]; | |
| high_row_ptr[channel] += high_pitch[channel]; | |
| } | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| uint8_t *channeldata[TRANSFORM_MAX_CHANNELS]; // used in quarter res decodes | |
| int channelpitch[TRANSFORM_MAX_CHANNELS]; // used in quarter res decodes | |
| int i; | |
| FRAME_INFO info2; | |
| memcpy(&info2, info, sizeof(FRAME_INFO)); | |
| info2.height = 1; | |
| for(i=0;i<num_channels;i++) | |
| { | |
| channeldata[i] = (uint8_t *)channel_row_ptr[i]; | |
| channelpitch[i] = 0; | |
| } | |
| #if 1 | |
| { | |
| __m128i *Y = (__m128i *)channeldata[0]; | |
| __m128i *U = (__m128i *)channeldata[1]; | |
| __m128i *V = (__m128i *)channeldata[2]; | |
| __m128i v; | |
| int x; | |
| __m128i rgb_limit_epi16 = _mm_set1_epi16(0x7fff - 0x0fff); | |
| for(x=0;x<info->width;x+=8) | |
| { | |
| v = _mm_load_si128(Y); | |
| v = _mm_adds_epi16(v, rgb_limit_epi16); | |
| v = _mm_subs_epu16(v, rgb_limit_epi16); | |
| v = _mm_slli_epi16(v, 4); | |
| _mm_store_si128(Y++, v); | |
| } | |
| for(x=0;x<info->width/2;x+=8) | |
| { | |
| v = _mm_load_si128(U); | |
| v = _mm_adds_epi16(v, rgb_limit_epi16); | |
| v = _mm_subs_epu16(v, rgb_limit_epi16); | |
| v = _mm_slli_epi16(v, 4); | |
| _mm_store_si128(U++, v); | |
| } | |
| for(x=0;x<info->width/2;x+=8) | |
| { | |
| v = _mm_load_si128(V); | |
| v = _mm_adds_epi16(v, rgb_limit_epi16); | |
| v = _mm_subs_epu16(v, rgb_limit_epi16); | |
| v = _mm_slli_epi16(v, 4); | |
| _mm_store_si128(V++, v); | |
| } | |
| } | |
| #else | |
| //non SSE2 | |
| for(x=0;x<info->width*2;x++) | |
| { | |
| int val = *gptr++; | |
| if(val < 0) val = 0; | |
| if(val > 4095) val = 4095; | |
| val <<= 4; | |
| *src++ = val; | |
| } | |
| src = scanline2; | |
| #endif | |
| Row16uQuarter2OutputFormat(decoder, &info2, 0, output_row_ptr, output_pitch, | |
| decoder->gop_frame_num/*0 frame*/, scratch->free_ptr, scratch->free_size, false, channeldata, channelpitch); | |
| } | |
| else | |
| { | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| // Convert the rows of luma and chroma into the output format | |
| switch(format) | |
| { | |
| case COLOR_FORMAT_YUYV: | |
| case COLOR_FORMAT_UYVY: | |
| // Pack the intermediate results into the output row | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| assert(0);//need quarter res BAYER To YUV decoder | |
| } | |
| else if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| // assert(0);//need quarter res RGB To YUV decoder | |
| ConvertRGB2YUV( channel_row_ptr[1], channel_row_ptr[0], channel_row_ptr[2], | |
| output_width, output_width, output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 10, info->colorspace, format); | |
| } | |
| else | |
| { | |
| ConvertUnpacked16sRowToPacked8u(channel_row_ptr, num_channels, output_row_ptr, output_width, format); | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB24: | |
| if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| ConvertRGB48toRGB24( channel_row_ptr[1], channel_row_ptr[0], channel_row_ptr[2], | |
| output_width, output_width, output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 10, 0); | |
| } | |
| else | |
| { | |
| // Convert the intermediate results into a row of RGB24 | |
| ConvertUnpacked16sRowToRGB24(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, format, color_space); | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB32: | |
| if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| ConvertRGBA48toRGB32(channel_row_ptr[1], channel_row_ptr[0], channel_row_ptr[2], NULL, | |
| output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 10, 0, 3/*only 3 chhanel not 4 for alpha*/); | |
| } | |
| else | |
| { | |
| // Convert the intermediate results into a row of RGBA32 | |
| ConvertUnpacked16sRowToRGB32(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, format, color_space, alpha); | |
| } | |
| break; | |
| case COLOR_FORMAT_YU64: | |
| case COLOR_FORMAT_V210: | |
| // Convert the intermediate results into a row of YU64 | |
| ConvertUnpacked16sRowToYU64(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| shift_yu64, precision, format); | |
| break; | |
| case COLOR_FORMAT_B64A: | |
| if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToB64A(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| } | |
| else | |
| { | |
| ConvertUnpackedYUV16sRowToRGB48(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision, COLOR_FORMAT_B64A, color_space); | |
| } | |
| break; | |
| case COLOR_FORMAT_R210: | |
| case COLOR_FORMAT_DPX0: | |
| case COLOR_FORMAT_RG30: | |
| case COLOR_FORMAT_AR10: | |
| case COLOR_FORMAT_AB10: | |
| if((decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGB30(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision, format, color_space); | |
| } | |
| else | |
| { | |
| ConvertUnpackedYUV16sRowToRGB48(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision, format, color_space); | |
| } | |
| break; | |
| case COLOR_FORMAT_RG48: | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGB48(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| break; | |
| case COLOR_FORMAT_RG64: | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGBA64(channel_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| break; | |
| default: | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "ReconstructQuarterFrame bad color format: %d\n", format); | |
| } | |
| #endif | |
| assert(0); | |
| break; | |
| } | |
| } | |
| // Advance the output row pointer | |
| output_row_ptr += output_pitch; | |
| } | |
| } | |
| #endif | |
| #if 0 | |
| // Copy the quarter resolution lowpass channels from the spatial transform | |
| void CopyQuarterFrameToBuffer(TRANSFORM **transform_array, int num_channels, | |
| uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, int precision) | |
| { | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| PIXEL *input_row_ptr[CODEC_MAX_CHANNELS]; | |
| uint8_t *output_row_ptr = output; | |
| int input_pitch[CODEC_MAX_CHANNELS]; | |
| int channel; | |
| int row; | |
| // Get pointers into the wavelets for each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Get the lowpass bands from the two wavelets for the two halves of the temporal wavelet | |
| IMAGE *wavelet = transform_array[channel]->wavelet[1]; | |
| // Get the pointers to the first row in each lowpass band | |
| input_row_ptr[channel] = wavelet->band[0]; | |
| input_pitch[channel] = wavelet->pitch / sizeof(PIXEL); | |
| } | |
| for (row = 0; row < output_height; row++) | |
| { | |
| // Descale and pack the pixels in each output row | |
| CopyQuarterRowToBuffer(input_row_ptr, num_channels, output_row_ptr, output_width, precision); | |
| // Advance the input row pointers | |
| for (channel = 0; channel < num_channels; channel++) { | |
| input_row_ptr[channel] += input_pitch[channel]; | |
| } | |
| // Advance the output row pointer | |
| output_row_ptr += output_pitch; | |
| } | |
| } | |
| #endif | |
| // Convert the quarter resolution lowpass channels to the specified output format | |
| void ConvertQuarterFrameToBuffer(DECODER *decoder, TRANSFORM **transform_array, int num_channels, | |
| uint8_t *output, int output_pitch, | |
| FRAME_INFO *info, int precision) | |
| { | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| PIXEL *input_row_ptr[CODEC_MAX_CHANNELS]; | |
| uint8_t *output_row_ptr = output; | |
| int input_pitch[CODEC_MAX_CHANNELS]; | |
| int channel; | |
| int row; | |
| // Value used for filling the fourth channel in ARGB output | |
| int alpha = 255; | |
| int format = COLORFORMAT(info); | |
| int color_space = COLORSPACE(info); | |
| int decoded_format = DECODEDFORMAT(info); | |
| //bool inverted = false; | |
| // Get pointers into the wavelets for each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Get the lowpass bands from the wavelets with quarter resolution | |
| const int wavelet_index = 1; | |
| IMAGE *wavelet = transform_array[channel]->wavelet[wavelet_index]; | |
| // The wavelet should have been reconstructed | |
| assert(wavelet != NULL); | |
| // The lowpass band should be valid | |
| assert((wavelet->band_valid_flags & BAND_VALID_MASK(0)) != 0); | |
| // Get the pointers to the first row in each lowpass band | |
| input_row_ptr[channel] = wavelet->band[0]; | |
| input_pitch[channel] = wavelet->pitch / sizeof(PIXEL); | |
| } | |
| // Invert the image if required | |
| switch (decoded_format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB32: | |
| output_row_ptr += (output_height - 1) * output_pitch; | |
| output_pitch = NEG(output_pitch); | |
| } | |
| ComputeCube(decoder); | |
| //HACK DAN20110122 -- some formats will not directly decode so need to use the AM route | |
| { | |
| if( format == COLOR_FORMAT_YU64 || | |
| format == COLOR_FORMAT_V210 || | |
| format == COLOR_FORMAT_R408 || | |
| format == COLOR_FORMAT_V408) | |
| { | |
| if( (decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| decoder->use_active_metadata_decoder = true; | |
| decoder->apply_color_active_metadata = true; | |
| } | |
| } | |
| } | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output_row_ptr; | |
| mailbox->pitch = output_pitch; | |
| mailbox->framenum = 0; | |
| for(channel = 0; channel < num_channels; channel++) | |
| { | |
| mailbox->channeldata[channel] = (uint8_t *)input_row_ptr[channel]; | |
| mailbox->channelpitch[channel] = input_pitch[channel]*sizeof(PIXEL); | |
| } | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| decoder->RGBFilterBufferPhase = 1; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| decoder->RGBFilterBufferPhase = 0; | |
| } | |
| #endif | |
| } | |
| else | |
| { | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| // Convert each row to the specified output format | |
| for (row = 0; row < output_height; row++) | |
| { | |
| // Right shift for converting lowpass coefficients to pixels | |
| int descale = 4; | |
| switch(format & 0x7fffffff) | |
| { | |
| case COLOR_FORMAT_YUYV: | |
| case COLOR_FORMAT_UYVY: | |
| if( (decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| // assert(0);//need quarter res RGB To YUV decoder | |
| ConvertRGB2YUV( input_row_ptr[1], input_row_ptr[0], input_row_ptr[2], | |
| output_width, output_width, output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 14, info->colorspace, format); | |
| } | |
| else | |
| { | |
| // Descale and pack the pixels in each output row | |
| CopyQuarterRowToBuffer(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| precision, format); | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB24: | |
| if( (decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| ConvertRGB48toRGB24(input_row_ptr[1], input_row_ptr[0], input_row_ptr[2], | |
| output_width, output_width, output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 14, 0); | |
| } | |
| else | |
| { | |
| // Convert the intermediate results into a row of RGB24 | |
| ConvertUnpacked16sRowToRGB24(input_row_ptr, num_channels, output_row_ptr, output_width, descale, format, color_space); | |
| } | |
| break; | |
| case COLOR_FORMAT_RGB32: | |
| case COLOR_FORMAT_RGB32_INVERTED: | |
| if( (decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| ConvertRGBA48toRGB32( input_row_ptr[1], input_row_ptr[0], input_row_ptr[2], input_row_ptr[3], | |
| output_width, | |
| output_row_ptr, output_pitch, | |
| info->width, 1, 14, 0, num_channels); | |
| } | |
| else | |
| { | |
| // Convert the intermediate results into a row of RGBA32 | |
| ConvertUnpacked16sRowToRGB32(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, format, color_space, alpha); | |
| } | |
| break; | |
| case COLOR_FORMAT_YU64: | |
| case COLOR_FORMAT_V210: | |
| if( (decoder->codec.encoded_format == ENCODED_FORMAT_RGB_444) || | |
| (decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444)) | |
| { | |
| //TODO RGB to YUV Quarter RES DAN20110120 - handle above with HACK DAN20110122 | |
| // | |
| } | |
| else | |
| { | |
| // Convert the intermediate results into a row of YU64 | |
| ConvertUnpacked16sRowToYU64(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision, format); | |
| } | |
| break; | |
| case COLOR_FORMAT_B64A: | |
| // Convert the intermediate results to a row of ARGB with 16 bits per pixel | |
| descale = 2; | |
| ConvertUnpacked16sRowToB64A(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| break; | |
| case COLOR_FORMAT_R210: | |
| case COLOR_FORMAT_DPX0: | |
| case COLOR_FORMAT_RG30: | |
| case COLOR_FORMAT_AR10: | |
| case COLOR_FORMAT_AB10: | |
| // Convert the intermediate results to a row of ARGB with 16 bits per pixel | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGB30(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision, format, color_space); | |
| break; | |
| case COLOR_FORMAT_RG48: | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGB48(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| break; | |
| case COLOR_FORMAT_RG64: | |
| // Convert the intermediate results into a row of RGBA with 16 bits per component | |
| descale = 2; | |
| ConvertUnpacked16sRowToRGBA64(input_row_ptr, num_channels, output_row_ptr, output_width, | |
| descale, precision); | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| // Advance the input row pointers | |
| for (channel = 0; channel < num_channels; channel++) { | |
| input_row_ptr[channel] += input_pitch[channel]; | |
| } | |
| // Advance the output row pointer | |
| output_row_ptr += output_pitch; | |
| } | |
| } | |
| } | |
| // Release all resources allocated by the decoder | |
| void DecodeRelease(DECODER *decoder, TRANSFORM *transform[], int num_transforms) | |
| { | |
| #if _TIMING && 0 | |
| FILE *logfile = decoder->logfile; | |
| uint32_t frame_count = decoder->frame_count; | |
| if (logfile != NULL && frame_count > 0)\ | |
| { | |
| #ifdef _WIN32 | |
| PrintStatistics(logfile, frame_count, NULL, TIMING_CSV_FILENAME); | |
| #else | |
| PrintStatistics(logfile, frame_count, NULL, NULL); | |
| #endif | |
| } | |
| #endif | |
| // Free the data structures allocated for decoding | |
| ClearDecoder(decoder); | |
| } | |
| void DecodeForceMetadataRefresh(DECODER *decoder) | |
| { | |
| CFHDDATA *cfhddata = &decoder->cfhddata; | |
| cfhddata->force_metadata_refresh = true; | |
| if (decoder->parallelDecoder) { | |
| cfhddata = &decoder->parallelDecoder->cfhddata; | |
| cfhddata->force_metadata_refresh = true; | |
| } | |
| } | |
| void SetDecoderFlags(DECODER *decoder, uint32_t flags) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| // Set the decoder flags | |
| decoder->flags = flags; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Decoder flags: 0x%p\n", decoder->flags); | |
| } | |
| #endif | |
| } | |
| void SetDecoderFormat(DECODER *decoder, int width, int height, int format, int resolution) | |
| { | |
| // Need to modify the codec to use the decoding format | |
| decoder->frame.width = width; | |
| decoder->frame.height = height; | |
| if(format == DECODED_FORMAT_WP13) | |
| { | |
| decoder->frame.output_format = format; | |
| //decoder->frame.format = DECODED_FORMAT_RG48; //TODO Why is this needed with W13A work natively. | |
| decoder->frame.format = format; | |
| //decoder->frame.signed_pixels = 1; | |
| decoder->frame.white_point = 13; | |
| } | |
| else if(format == DECODED_FORMAT_W13A) | |
| { | |
| decoder->frame.output_format = format; | |
| // decoder->frame.format = DECODED_FORMAT_W13A; // TODO eventually this might be DECODED_FORMAT_RG64 | |
| decoder->frame.format = format; | |
| //decoder->frame.signed_pixels = 1; | |
| decoder->frame.white_point = 13; | |
| } | |
| else | |
| { | |
| decoder->frame.output_format = format; | |
| decoder->frame.format = format; | |
| //decoder->frame.signed_pixels = 0; | |
| decoder->frame.white_point = 16; | |
| } | |
| decoder->frame.resolution = resolution; | |
| decoder->frame.pixel_size = PixelSize(decoder->frame.format); | |
| } | |
| void SetDecoderCapabilities(DECODER *decoder) | |
| { | |
| int processor_count; | |
| #ifdef _WIN32 | |
| int limit_cpus = 32; | |
| #else | |
| int limit_cpus = 32; // AJA spins off too many | |
| #endif | |
| // Set the capabilities that are most likely supported by the Intel Mac | |
| decoder->thread_cntrl.capabilities = (_CPU_FEATURE_MMX | _CPU_FEATURE_SSE | _CPU_FEATURE_SSE2); | |
| if (decoder->thread_cntrl.limit) | |
| { | |
| limit_cpus = decoder->thread_cntrl.limit; | |
| } | |
| else if (decoder->thread_cntrl.affinity) | |
| { | |
| int i; | |
| const int max_cpu_count = 32; | |
| limit_cpus = 0; | |
| for (i = 0; i < max_cpu_count; i++) | |
| { | |
| if (decoder->thread_cntrl.affinity & (1<<i)) { | |
| limit_cpus++; | |
| } | |
| } | |
| } | |
| // Set the number of processors | |
| processor_count = GetProcessorCount(); | |
| if(processor_count > limit_cpus) | |
| processor_count = limit_cpus; | |
| #if (0 && DEBUG) | |
| // Set the number of processors (for debugging) | |
| //processor_count = 8; | |
| processor_count = 1; | |
| fprintf(stderr, "Limit processors to %d\n", processor_count); | |
| #endif | |
| decoder->thread_cntrl.capabilities |= (processor_count << 16); | |
| } | |
| int GetDecoderCapabilities(DECODER *decoder) | |
| { | |
| return decoder->thread_cntrl.capabilities; | |
| } | |
| bool SetDecoderColorFlags(DECODER *decoder, uint32_t color_flags) | |
| { | |
| if (/*MIN_DECODED_COLOR_SPACE <= color_flags && */color_flags <= MAX_DECODED_COLOR_SPACE) | |
| { | |
| decoder->frame.colorspace = color_flags; | |
| // Indicate that the color flags were set as specified | |
| return true; | |
| } | |
| // The specified color flags were not valid | |
| return false; | |
| } | |
| // Compute the resolution corresponding to the specified combination of input and output dimensions | |
| int DecodedResolution(int input_width, int input_height, int output_width, int output_height) | |
| { | |
| int decoded_width; | |
| int decoded_height; | |
| // Output height can be negative for inverted RGB | |
| output_height = abs(output_height); | |
| if (output_width == input_width && output_height == input_height) { | |
| return DECODED_RESOLUTION_FULL; | |
| } | |
| // Compute the dimensions for half resolution decoding | |
| decoded_width = input_width / 2; | |
| decoded_height = input_height / 2; | |
| // Do the output dimensions correspond to half resolution decoding? | |
| if (output_width == decoded_width && output_height == decoded_height) { | |
| return DECODED_RESOLUTION_HALF; | |
| } | |
| // Compute the dimensions for quarter resolution decoding | |
| decoded_width /= 2; | |
| decoded_height /= 2; | |
| // Do the output dimensions correspond to half resolution decoding? | |
| if (output_width == decoded_width && output_height == decoded_height) { | |
| return DECODED_RESOLUTION_QUARTER; | |
| } | |
| return DECODED_RESOLUTION_UNSUPPORTED; | |
| } | |
| // Compute the decoded resolution that is closest to the output dimensions | |
| int DecodedScale(int input_width, int input_height, int output_width, int output_height) | |
| { | |
| int decoded_width = input_width; | |
| int decoded_height = input_height; | |
| static int decodedResolution[] = | |
| { | |
| DECODED_RESOLUTION_FULL, | |
| DECODED_RESOLUTION_HALF, | |
| DECODED_RESOLUTION_QUARTER | |
| }; | |
| int reduction = 0; | |
| int max_reduction = 2; | |
| // Output height can be negative for inverted RGB | |
| output_height = abs(output_height); | |
| #if 1 | |
| // Always decode to the next larger size | |
| while (decoded_width > output_width && | |
| decoded_height > output_height && | |
| reduction < max_reduction) | |
| { | |
| // Decode to a frame size that is larger than the output image | |
| int reduced_width = decoded_width / 2; | |
| int reduced_height = decoded_height / 2; | |
| if (reduced_width >= output_width && reduced_height >= output_height) | |
| { | |
| decoded_width = reduced_width; | |
| decoded_height = reduced_height; | |
| reduction++; | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| } | |
| #else | |
| while (decoded_width*4 > output_width*5 && | |
| decoded_height*4 > output_height*5 && | |
| reduction < max_reduction) | |
| { | |
| #if 0 | |
| // Decode to a frame size that is larger than the output image | |
| int reduced_width = decoded_width / 2; | |
| int reduced_height = decoded_height / 2; | |
| if (reduced_width >= output_width && reduced_height >= output_height) | |
| { | |
| decoded_width = reduced_width; | |
| decoded_height = reduced_height; | |
| reduction++; | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| #else | |
| // Better to scale up a smaller image than scale down a larger image | |
| decoded_width /= 2; | |
| decoded_height /= 2; | |
| reduction++; | |
| #endif | |
| } | |
| #endif | |
| // Check that the decoded resolution is valid | |
| assert(0 <= reduction && reduction <= max_reduction); | |
| return decodedResolution[reduction]; | |
| } | |
| void ComputeDecodedDimensions(int encoded_width, int encoded_height, int decoded_resolution, | |
| int *decoded_width_out, int *decoded_height_out) | |
| { | |
| switch (decoded_resolution) | |
| { | |
| default: | |
| assert(0); | |
| case DECODED_RESOLUTION_FULL: | |
| *decoded_width_out = encoded_width; | |
| *decoded_height_out = encoded_height; | |
| break; | |
| case DECODED_RESOLUTION_HALF: | |
| *decoded_width_out = encoded_width / 2; | |
| *decoded_height_out = encoded_height / 2; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| *decoded_width_out = encoded_width / 4; | |
| *decoded_height_out = encoded_height / 4; | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| //TODO: Check that the lowpass dimensions are correct | |
| *decoded_width_out = encoded_width / 8; | |
| *decoded_height_out = encoded_height / 8; | |
| break; | |
| } | |
| } | |
| // Return true if the specified resolution is supported | |
| bool IsDecodedResolution(int resolution) | |
| { | |
| if (resolution == DECODED_RESOLUTION_QUARTER) { | |
| return true; | |
| } | |
| return (resolution == DECODED_RESOLUTION_FULL || | |
| resolution == DECODED_RESOLUTION_HALF); | |
| } | |
| // Return true if the encoded sample is a key frame | |
| bool IsSampleKeyFrame(uint8_t *sample, size_t size) | |
| { | |
| bool key_frame_flag = false; | |
| // Search the first twenty tags for the sample type | |
| const int num_tags = 20; | |
| int i; | |
| BITSTREAM bitstream; | |
| InitBitstreamBuffer(&bitstream, sample, size, BITSTREAM_ACCESS_READ); | |
| for (i = 0; i < num_tags && size > 0; i++, size -= sizeof(TAGVALUE)) | |
| { | |
| TAGVALUE segment = GetSegment(&bitstream); | |
| if (segment.tuple.tag == CODEC_TAG_SAMPLE) | |
| { | |
| switch (segment.tuple.value) | |
| { | |
| case SAMPLE_TYPE_GROUP: | |
| case SAMPLE_TYPE_FIRST: | |
| case SAMPLE_TYPE_IFRAME: | |
| key_frame_flag = true; | |
| break; | |
| case SAMPLE_TYPE_SEQUENCE_HEADER: | |
| case SAMPLE_TYPE_FRAME: | |
| case SAMPLE_TYPE_SECOND: | |
| case SAMPLE_TYPE_PFRAME: | |
| default: | |
| key_frame_flag = false; | |
| break; | |
| case SAMPLE_TYPE_GROUP_TRAILER: | |
| case SAMPLE_TYPE_NONE: | |
| case SAMPLE_TYPE_ERROR: | |
| case SAMPLE_TYPE_CHANNEL: | |
| assert(0); // Unexpected situation | |
| key_frame_flag = false; // Report the sample as a non-key frame | |
| break; | |
| } | |
| break; // Found the sample type | |
| } | |
| } | |
| return key_frame_flag; | |
| } | |
| // Return the number of the more recent decoded frame | |
| uint32_t DecodedFrameNumber(DECODER *decoder) | |
| { | |
| CODEC_STATE *codec = &decoder->codec; | |
| if (decoder == NULL) return 0; | |
| return codec->frame_number; | |
| } | |
| /***** Start of the new code for the finite state machine (FSM) decoder *****/ | |
| #if _PROCESSOR_DISPATCH | |
| __declspec(cpu_dispatch(Pentium_4,Generic)) | |
| static inline void ZeroHighPassRow(PIXEL *rowptr, int length) | |
| { | |
| // Stub routine for processor specific dispatch | |
| } | |
| #endif | |
| #if _PROCESSOR_GENERIC | |
| #if _PROCESSOR_DISPATCH | |
| __declspec(cpu_specific(Generic)) | |
| #endif | |
| // This version assumes that the row is a multiple of 8 bytes | |
| static inline void ZeroHighPassRow(PIXEL *rowptr, int length) | |
| { | |
| int count; | |
| // Check that the row starts on a 16-byte boundary | |
| //assert(ISALIGNED(rowptr, 16)); | |
| // Check that the row length (in bytes) is a multiple of 8 byte blocks | |
| assert(ISALIGNED(length, 8)); | |
| // Convert the length from pixels to 8-byte blocks | |
| count = (length >> 3); | |
| // This code assumes that at least one 8-byte block will be zeroed | |
| assert(count > 0); | |
| __asm | |
| { | |
| pxor mm0, mm0 // Zero a 16 byte register | |
| mov eax, rowptr // Load the pointer to the memory block | |
| mov ebx, count // Load the count of 8-byte blocks | |
| loop: movq [eax], mm0 // Write 8 bytes of zeros | |
| add eax, 8 // Advance to the next 8 byte block | |
| sub ebx, 1 // Decrement the number of blocks | |
| jg loop | |
| } | |
| //_mm_empty(); | |
| } | |
| #endif | |
| #if _PROCESSOR_PENTIUM_4 | |
| #if _PROCESSOR_DISPATCH | |
| __declspec(cpu_specific(Pentium_4)) | |
| #endif | |
| #ifndef _WIN64 | |
| // This version assumes that the row is a multiple of 16 bytes | |
| static inline void ZeroHighPassRow(PIXEL *rowptr, int length) | |
| { | |
| int count; | |
| // Check that the row starts on a 16-byte boundary | |
| assert(ISALIGNED(rowptr, 16)); | |
| // Check that the row length (in bytes) is a multiple of 16 byte blocks | |
| assert(ISALIGNED(length, 16)); | |
| // Convert the length from pixels to 16-byte blocks | |
| count = (length >> 4); | |
| // This code assumes that at least one 16-byte block will be zeroed | |
| assert(count > 0); | |
| #if 1 //DANREMOVE | |
| memset(rowptr, 0, length); | |
| #else | |
| __asm | |
| { | |
| pxor xmm0, xmm0 // Zero a 16 byte register | |
| mov eax, rowptr // Load the pointer to the memory block | |
| mov ebx, count // Load the count of 16-byte blocks | |
| loop: movdqa [eax], xmm0 // Write 16 bytes of zeros | |
| add eax, 16 // Advance to the next 16 byte block | |
| sub ebx, 1 // Decrement the number of blocks | |
| jg loop | |
| } | |
| #endif | |
| } | |
| #else | |
| // This version assumes that the row is a multiple of 16 bytes | |
| static inline void ZeroHighPassRow(PIXEL *rowptr, int length) | |
| { | |
| // Check that the row starts on a 16-byte boundary | |
| assert(ISALIGNED(rowptr, 16)); | |
| // Check that the row length (in bytes) is a multiple of 16 byte blocks | |
| assert(ISALIGNED(length, 16)); | |
| memset(rowptr, 0, length); | |
| } | |
| #endif | |
| #endif | |
| #if (0 && _DEBUG) | |
| // Functions for the finite state machine decoder (debug version) | |
| static FSMENTRY *GetFSMTableEntry(FSM *fsm, int index) | |
| { | |
| // Return the address of the next table entry in the finite state machine | |
| return &fsm->next_state[index]; | |
| } | |
| static void ResetFSM(FSM *fsm) | |
| { | |
| // Reset the state to the beginning of the finite state machine entries | |
| fsm->next_state = fsm->entries; | |
| } | |
| static void UpdateFSM(FSM *fsm, int next) | |
| { | |
| // Change the state pointer to the next block of table entries | |
| fsm->next_state = fsm->entries + (next << FSM_INDEX_SIZE); | |
| } | |
| #else | |
| // Macros for the finite state machine decoder | |
| #if _INDIVIDUAL_LUT | |
| #define GetFSMTableEntry(fsm, index) (FSMENTRY *)fsm->next_state+index | |
| #define ResetFSM(fsm) fsm->next_state = fsm->table.entries[0] | |
| #define UpdateFSM(fsm, next) fsm->next_state = fsm->table.entries[next] | |
| #define GetFSMTableEntryIndividual(fsm, index) (FSMENTRY *)fsm->table.entries_ind[(fsm->next_state_index << FSM_INDEX_SIZE) | index] | |
| #define ResetFSMIndividual(fsm) fsm->next_state_index = 0 | |
| #define UpdateFSMIndividual(fsm, next) fsm->next_state_index = next | |
| #else | |
| #define GetFSMTableEntry(fsm, index) (FSMENTRY *)fsm->next_state+index | |
| #define ResetFSM(fsm) fsm->next_state = fsm->table.entries | |
| #define UpdateFSM(fsm, next) fsm->next_state = fsm->table.entries+((int)next << FSM_INDEX_SIZE) | |
| #endif | |
| #endif | |
| #if _DEBUG | |
| static void DebugOutputFSMEntry(FSM *fsm, int index, FSMENTRY *entry) | |
| { | |
| int pre_skip = (entry->pre_post_skip & 0xFFF); | |
| int post_skip = (entry->pre_post_skip >> 12); | |
| // Remove companding | |
| int value0 = entry->value0 / 32; | |
| int value1 = entry->value1 / 32; | |
| // Convert the index to start at the beginning of the table | |
| index += (int)(fsm->next_state - fsm->table.entries[0]); | |
| } | |
| static void DebugOutputFSMEntryFast(FSM *fsm, int index, FSMENTRYFAST *entry) | |
| { | |
| int pre_skip = (entry->pre_post_skip & 0xFFF); | |
| int post_skip = (entry->pre_post_skip >> 12); | |
| // Remove companding | |
| int value0 = (entry->values >> 16) / 32; | |
| int value1 = (entry->values & 0xFFFF) / 32; | |
| // Convert the index to start at the beginning of the table | |
| index += (int)(fsm->next_state - fsm->table.entries[0]); | |
| } | |
| static void DebugOutputFSM(FSM *fsm) | |
| { | |
| int num_entries = FSM_INDEX_ENTRIES; | |
| int i; | |
| for (i = 0; i < num_entries; i++) | |
| { | |
| FSMENTRY *entry = &fsm->table.entries[0][i]; | |
| int pre_skip = (entry->pre_post_skip & 0xFFF); | |
| int post_skip = (entry->pre_post_skip >> 12); | |
| } | |
| } | |
| static void PrintFSMEntry(FSM *fsm, int index, FSMENTRY *entry, FILE *logfile) | |
| { | |
| int pre_skip = (entry->pre_post_skip & 0xFFF); | |
| int post_skip = (entry->pre_post_skip >> 12); | |
| // Remove companding | |
| int value0 = entry->value0 / 32; | |
| int value1 = entry->value1 / 32; | |
| // Convert the index to start at the beginning of the table | |
| index += (int)(fsm->next_state - fsm->table.entries[0]); | |
| if (logfile) { | |
| fprintf(logfile, "%d, %d, %d, %d, %d\n", index, value0, value1, pre_skip, post_skip); | |
| } | |
| } | |
| static void PrintFSMEntryFast(FSM *fsm, int index, FSMENTRYFAST *entry, FILE *logfile) | |
| { | |
| int pre_skip = (entry->pre_post_skip & 0xFFF); | |
| int post_skip = (entry->pre_post_skip >> 12); | |
| // Remove companding | |
| int value0 = (entry->values >> 16) / 32; | |
| int value1 = (entry->values & 0xFFFF) / 32; | |
| // Convert the index to start at the beginning of the table | |
| index += (int)(fsm->next_state - fsm->table.entries[0]); | |
| if (logfile) { | |
| fprintf(logfile, "%d, %d, %d, %d, %d\n", index, value0, value1, pre_skip, post_skip); | |
| } | |
| } | |
| #endif | |
| static inline int GetFastByte(BITSTREAM *stream) | |
| { | |
| // Inline of the third case of GetByte | |
| uint8_t *lpCurrentWord = stream->lpCurrentWord; | |
| // Get the next byte from the bitstream | |
| int byte = (uint32_t )(*(lpCurrentWord++)); | |
| // Update the state of the bitstream | |
| stream->lpCurrentWord = lpCurrentWord; | |
| #if ERROR_TOLERANT | |
| // Update the count of bytes used | |
| stream->nWordsUsed--; | |
| #endif | |
| // Check that the high bits are zero | |
| assert((byte & ~BITMASK(8)) == 0); | |
| return byte; | |
| } | |
| #if 0 | |
| static inline int GetFastShort(BITSTREAM *stream) | |
| { | |
| // Adaptation of the code in GetByte | |
| uint8_t *lpCurrentWord = stream->lpCurrentWord; | |
| // Get the next byte from the bitstream | |
| int byte = (uint32_t )(lpCurrentWord[0]); | |
| int word = (byte << 8) | (uint32_t )(lpCurrentWord[1]); | |
| // Update the state of the bitstream | |
| stream->lpCurrentWord = lpCurrentWord+2; | |
| // Check that the high bits are zero | |
| assert((word & ~BITMASK(16)) == 0); | |
| return word; | |
| } | |
| #endif | |
| // Must declare the byte swap function even though it is an intrinsic | |
| //int _bswap(int); | |
| #if 0 | |
| static inline int GetFastLong(BITSTREAM *stream) | |
| { | |
| uint32_t *lpCurrentWord = (uint32_t *)stream->lpCurrentWord; | |
| int word = *(lpCurrentWord)++; | |
| //word = _bswap(word); | |
| word = SwapInt32BtoN(word); | |
| stream->lpCurrentWord = (uint8_t *)lpCurrentWord; | |
| return word; | |
| } | |
| #endif | |
| #if 0 //DAN20041030 not used | |
| // Decode a subband using FSM. One byte is read from the bitstream each time and decoded in two steps | |
| // Original version that does not use a separate buffer for decoding | |
| bool DecodeBandFSM(FSM *fsm, BITSTREAM *stream, PIXEL *image, int width, int height, int pitch, int quantization) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL *rowptr = image; | |
| int column = 0; | |
| int32_t value; | |
| size_t bytes_row_size = width * sizeof(PIXEL); | |
| PIXEL *maxptr; | |
| int length = width * sizeof(PIXEL); | |
| //ROI roi = {width, 1}; | |
| // This version of Huffman decoder assumes that one byte | |
| // is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Convert the pitch to units of pixels | |
| pitch /= sizeof(PIXEL); | |
| // Compute the address of the row after the last row in the band | |
| maxptr = rowptr + height * pitch; | |
| // Round up the row length (in bytes) to a multiple of 16 bytes | |
| length = ALIGN16(length); | |
| #if (0 && DEBUG) | |
| zerorow_count = 0; | |
| #endif | |
| ZeroHighPassRow(rowptr, length); | |
| // Decode runs and magnitude values until the band end trailer is decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return when the entire band is decoded | |
| if (entry->value0 == BAND_END_TRAILER) { | |
| // Zero out the whole subband from here on | |
| rowptr += pitch; | |
| while(rowptr < maxptr) { | |
| ZeroHighPassRow(rowptr, length); | |
| rowptr += pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if(entry->value1 == 0) { | |
| // Undo quantization and scaling | |
| value = quantization * entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = quantization * entry->value0; | |
| rowptr[column++] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry->value0; | |
| rowptr[column] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| column = 0; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| } | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) { | |
| // Zero out the whole subband from here on | |
| rowptr += pitch; | |
| while(rowptr < maxptr) { | |
| ZeroHighPassRow(rowptr, length); | |
| rowptr += pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if (entry->value1 == 0) { | |
| // Undo quantization and scaling | |
| int32_t value = quantization * entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = quantization * entry->value0; | |
| rowptr[column++] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry->value0; | |
| rowptr[column] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, length); | |
| column = 0; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| } | |
| } | |
| } | |
| #endif | |
| // Decode a subband of highpass coefficients using a finite state machine. | |
| // One byte is read from the bitstream each time and decoded in two steps. | |
| // New version that uses a buffer aligned to the cache for decoding. | |
| #if 0 | |
| static inline void ZeroHighPassBuffer(PIXEL *ptrCacheLines, int numCacheLines) | |
| { | |
| // This routine assume that the cache line size is 64 bytes | |
| assert(_CACHE_LINE_SIZE == 64); | |
| // This routine assumes that the input pointer is aligned to a cache line | |
| assert(ISALIGNED(ptrCacheLines, _CACHE_LINE_SIZE)); | |
| // This routine assumes that at least one cache line will be written | |
| assert(numCacheLines > 0); | |
| #if __GNUC__ | |
| memset(ptrCacheLines, 0, numCacheLines * _CACHE_LINE_SIZE); | |
| #else | |
| __asm | |
| { | |
| pxor xmm0, xmm0 // Zero a 16 byte register | |
| mov eax, ptrCacheLines // Load the pointer to the memory block | |
| mov ebx, numCacheLines // Load the count of the number of cache lines | |
| loop: movdqa [eax], xmm0 // Write 64 bytes of zeros using aligned stores | |
| movdqa [eax+16], xmm0 | |
| movdqa [eax+32], xmm0 | |
| movdqa [eax+48], xmm0 | |
| add eax, 64 // Advance to the next cache line | |
| sub ebx, 1 // Decrement the number of cache lines | |
| jg loop | |
| } | |
| #endif | |
| // The routine returns the pointer to the cache line after zeroing the block | |
| } | |
| #endif | |
| #if 0 | |
| static inline void CopyRowBuffer(char *rowptr, PIXEL *buffer, int length) | |
| { | |
| // Note that the length is in units of bytes (not pixels) | |
| int count; // Number of 16-byte blocks to copy | |
| // Check that the row length is an integer multiple of 16-byte blocks | |
| assert(ISALIGNED(length, 16)); | |
| // Convert the row length to the number of 16-byte blocks to copy | |
| count = length >> 4; | |
| // This routine assumes that at least one 16 byte block will be copied | |
| assert(count > 0); | |
| #if __GNUC__ | |
| // Use standard memory copy | |
| memcpy(rowptr, buffer, length); | |
| #else | |
| // Copy a multiple of 16 byte blocks | |
| __asm | |
| { | |
| mov eax, rowptr // Load the pointer to the destination | |
| mov ebx, buffer // Load the pointer to the source | |
| mov ecx, count // Load the number of 16-byte blocks to copy | |
| loop: movdqa xmm0, [ebx] // Load 16 bytes from the source | |
| movntdq [eax], xmm0 // Copy 16 bytes to the destination | |
| add eax, 16 // Advance to the group of 16 bytes | |
| add ebx, 16 | |
| sub ecx, 1 // Decrement the number of blocks to copy | |
| jg loop | |
| } | |
| #endif | |
| } | |
| #endif | |
| // DecodeBandFSMBuffered is no longer used | |
| #if 0 //dan20041030 not used | |
| bool DecodeBandFSMBuffered(FSM *fsm, BITSTREAM *stream, PIXEL *image, | |
| int width, int height, int pitch, | |
| int quantization, char *decoding_buffer, size_t decoding_buffer_size) | |
| { | |
| char *rowptr = (char *)image; // Pointer to current row | |
| char *maxptr = rowptr + height * pitch; // Address of row after the last row | |
| FSMENTRY *entry; | |
| int index; | |
| int byte; | |
| int column = 0; | |
| int32_t value; | |
| size_t row_size; | |
| size_t cache_row_size; // Size of a row in bytes | |
| int cache_line_count; // Size of the buffer in cache lines | |
| PIXEL *buffer; // Pixel pointer to the buffer | |
| int length; // Length of row in bytes | |
| // Check that the processing size allows two chunks per byte | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| // The bitstream buffer should be empty | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Compute the number of cache lines used in the buffer | |
| row_size = width * sizeof(PIXEL); | |
| cache_row_size = ALIGN(row_size, _CACHE_LINE_SIZE); | |
| cache_line_count = (cache_row_size >> _CACHE_LINE_SHIFT); | |
| // Check that the buffer is large enough | |
| assert(decoding_buffer != NULL && decoding_buffer_size >= cache_row_size); | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(decoding_buffer, _CACHE_LINE_SIZE)); | |
| // This routine assumes that the rows are contiguous and the pitch is a multiple of 16 bytes | |
| length = pitch; | |
| assert(length == ALIGN(row_size, 16)); | |
| // Cast the buffer pointer for pixel access | |
| buffer = (PIXEL *)decoding_buffer; | |
| // Zero the decoding buffer | |
| ZeroHighPassBuffer(buffer, cache_line_count); | |
| // Decode runs and magnitude values until the band end trailer is decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return when the entire band is decoded | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| // Copy the buffer to the row if not already beyond the band | |
| if (rowptr < maxptr) CopyRowBuffer(rowptr, buffer, length); | |
| // Advance to the next row | |
| rowptr += pitch; | |
| // Zero the remaining rows in the subband | |
| while (rowptr < maxptr) { | |
| ZeroHighPassRow((PIXEL *)rowptr, length); | |
| rowptr += pitch; | |
| } | |
| // Reset the finite state machine to the root node in the Huffman tree | |
| ResetFSM(fsm); | |
| // Return indication that the band was fully decoded | |
| return true; | |
| } | |
| // Set the finite state machine to the next state in the Huffman tree | |
| UpdateFSM(fsm, entry->next_state); | |
| // No magnitude values decoded? | |
| if (entry->value0 == 0) | |
| { | |
| // No magnitudes decoded so just advance the column pointer | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| } | |
| // Only one magnitude value decoded? | |
| else if (entry->value1 == 0) | |
| { | |
| // Process the magnitude value that was decoded | |
| // Undo quantization and scaling | |
| value = quantization * entry->value0; | |
| // Advance to the column where the value should be placed | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| buffer[column] = SATURATE(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| } | |
| else // Two magnitude values were decoded | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if (column < width - 1) { | |
| // Dequantize and store the first value | |
| value = quantization * entry->value0; | |
| buffer[column++] = SATURATE(value); | |
| // Dequantize and store the second value | |
| value = quantization * entry->value1; | |
| buffer[column++] = SATURATE(value); | |
| } | |
| else { | |
| // Dequantize and store the first value in the current row | |
| value = quantization * entry->value0; | |
| buffer[column] = SATURATE(value); | |
| // Dequantize the second value | |
| value = quantization * entry->value1; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| // Reset the column to the beginning of the row | |
| column = 0; | |
| // Store the second value in the new row | |
| buffer[column++] = SATURATE(value); | |
| } | |
| } | |
| // Decode the second 4-bit chunk | |
| index = byte & FSM_INDEX_MASK; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| // Copy the buffer to the row if not already beyond the band | |
| if (rowptr < maxptr) CopyRowBuffer(rowptr, buffer, length); | |
| // Advance to the next row | |
| rowptr += pitch; | |
| // Zero the remaining rows in the subband | |
| while (rowptr < maxptr) { | |
| ZeroHighPassRow((PIXEL *)rowptr, length); | |
| rowptr += pitch; | |
| } | |
| // Reset the finite state machine to the root node in the Huffman tree | |
| ResetFSM(fsm); | |
| // Return indication that the band was fully decoded | |
| return true; | |
| } | |
| // Set the finite state machine to the next state in the Huffman tree | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if (entry->value1 == 0) { | |
| // Undo quantization and scaling | |
| int32_t value = quantization * entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| buffer[column] = SATURATE(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if (column < width-1) { | |
| value = quantization * entry->value0; | |
| buffer[column++] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| buffer[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry->value0; | |
| buffer[column] = SATURATE(value); | |
| value = quantization * entry->value1; | |
| // Advance to the next row | |
| assert(rowptr < maxptr); | |
| CopyRowBuffer(rowptr, buffer, length); | |
| rowptr += pitch; | |
| // Zero the decoding buffer if there are more rows to process | |
| if (rowptr < maxptr) ZeroHighPassBuffer(buffer, cache_line_count); | |
| // Reset the column to the beginning of the row | |
| column = 0; | |
| buffer[column++] = SATURATE(value); | |
| } | |
| } | |
| } | |
| } | |
| #endif | |
| #if 0 //dan20041030 not used | |
| // Decode a subband using FSM, combine the two results decoded from one byte | |
| bool DecodeBandFSMCombined(FSM *fsm, BITSTREAM *stream, PIXEL *image, int width, int height, int pitch, int quantization) | |
| { | |
| int index, skip; | |
| uint8_t byte; | |
| FSMENTRY *entry1, *entry2; | |
| PIXEL *rowptr = image; | |
| int row = 0, column = 0; | |
| int32_t value,bytes_row_size = width*sizeof(PIXEL); | |
| PIXEL *maxptr = rowptr + height*pitch; | |
| // This Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| ZeroHighPassRow(rowptr, width); | |
| // Double check that the bitstream buffer is empty | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Decode runs and magnitude values until the band end trailer is decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| //byte = GetBits(stream, BITSTREAM_WORD_SIZE); | |
| #if 0 | |
| byte = GetByte(stream); | |
| if (stream->error != BITSTREAM_ERROR_OKAY) { | |
| stream->error = VLC_ERROR_NOTFOUND; | |
| return false; | |
| } | |
| #else | |
| // Inline of the third case of GetByte | |
| uint8_t *lpCurrentWord = stream->lpCurrentWord; | |
| // Get the next byte from the bitstream | |
| byte = (uint32_t )(*(lpCurrentWord++)); | |
| // Update the state of the bitstream | |
| stream->lpCurrentWord = lpCurrentWord; | |
| // Check that the high bits are zero | |
| assert((byte & ~BITMASK(8)) == 0); | |
| #endif | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| entry1 = GetFSMTableEntry(fsm, index); | |
| UpdateFSM(fsm, entry1->next_state); | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| entry2 = GetFSMTableEntry(fsm, index); | |
| UpdateFSM(fsm, entry2->next_state); | |
| // Return when the subband is completely decoded | |
| if(entry1->value0 == BAND_END_TRAILER || entry2->value0 == BAND_END_TRAILER) { | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // If no magnitude value is decoded at the first step | |
| if (entry1->value0 == 0) { | |
| // If no magnitude is decoded at the second step | |
| if(entry2->value0 == 0) { | |
| column += entry1->pre_skip+entry2->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| // If one magnitude is decoded at the second step | |
| else if(entry2->value1 == 0) { | |
| // Skip to the non-zero position | |
| column += entry1->pre_skip+entry2->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Fill in the decoded magnitude | |
| // Undo quantization and scaling | |
| value = quantization * entry2->value0; | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < width); | |
| // Store the saturated value | |
| rowptr[column] = SATURATE(value); | |
| column += entry2->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| // If two magnitudes are decoded at the second step | |
| else { | |
| column += entry1->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = quantization * entry2->value0; | |
| rowptr[column++] = SATURATE(value); | |
| value = quantization * entry2->value1; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry2->value0; | |
| rowptr[column] = SATURATE(value); | |
| value = quantization * entry2->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| column = 0; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| } | |
| } | |
| // If only one magnitude is decoded at the first step | |
| else if(entry1->value1 == 0) { | |
| // Undo quantization and scaling | |
| value = quantization * entry1->value0; | |
| column += entry1->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE(value); | |
| // If no magnitude is decoded at the second step | |
| if(entry2->value0 == 0) { | |
| column += entry1->post_skip+entry2->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| // If one magnitude is decoded at the second step | |
| else if (entry2->value1 == 0) | |
| { | |
| // Undo quantization and scaling | |
| value = quantization * entry2->value0; | |
| column += entry1->post_skip+entry2->pre_skip; | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE(value); | |
| column += entry2->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| // If two magnitudes are decoded at the second step | |
| else | |
| { | |
| column += entry1->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = quantization * entry2->value0; | |
| rowptr[column++] = SATURATE(value); | |
| value = quantization * entry2->value1; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry2->value0; | |
| rowptr[column] = SATURATE(value); | |
| value = quantization * entry2->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| column = 0; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| } | |
| } | |
| // If two magnitudes are decoded at the first step | |
| else { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = quantization * entry1->value0; | |
| rowptr[column++] = SATURATE(value); | |
| value = quantization * entry1->value1; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| else { | |
| value = quantization * entry1->value0; | |
| rowptr[column] = SATURATE(value); | |
| value = quantization * entry1->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| column = 0; | |
| rowptr[column++] = SATURATE(value); | |
| } | |
| // If two magnitudes are decoded at the first step | |
| // then at most one more magnitude can be decoded at the second step | |
| assert(entry2->value1 == 0); | |
| // If no magnitude is decoded at the second step | |
| if(entry2->value0 == 0) { | |
| column += entry2->pre_skip; // entry2->pre_skip <=4 must be true | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| // If one magnitude is decoded at the second step | |
| else { | |
| column += entry2->pre_skip; // must be a small zero run | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if (rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| // Fill in the decoded magnitude | |
| // Undo quantization and scaling | |
| value = quantization * entry2->value0; | |
| // Check the column before storing the value | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE(value); | |
| column += entry2->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if (rowptr < maxptr) ZeroHighPassRow(rowptr, width); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| #endif | |
| #if 0 //dan20041030 not used | |
| // Decode a subband using FSM. One byte is read from the bitstream each time and decoded in two steps | |
| // Original version that does not use a separate buffer for decoding | |
| bool DecodeBandFSM8s(FSM *fsm, BITSTREAM *stream, PIXEL8S *image, int width, int height, int pitch) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL8S *rowptr = image; | |
| int column = 0; | |
| int32_t value; | |
| PIXEL8S *maxptr; | |
| int length = width * sizeof(PIXEL8S); | |
| //ROI roi = {width, 1}; | |
| // This version of Huffman decoder assumes that one byte | |
| // is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Convert the pitch to units of pixels | |
| pitch /= sizeof(PIXEL8S); | |
| // Compute the address of the row after the last row in the band | |
| maxptr = rowptr + height * pitch; | |
| // Round up the row length (in bytes) to a multiple of 16 bytes | |
| length = ALIGN16(length); | |
| ZeroHighPassRow((PIXEL *)rowptr, length); | |
| // Decode runs and magnitude values until the band end trailer is decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return when the entire band is decoded | |
| if (entry->value0 == BAND_END_TRAILER) { | |
| // Zero out the whole subband from here on | |
| rowptr += pitch; | |
| while(rowptr < maxptr) { | |
| ZeroHighPassRow((PIXEL *)rowptr, length); | |
| rowptr += pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) | |
| { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if(entry->value1 == 0) | |
| { | |
| value = entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE8S(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = entry->value0; | |
| rowptr[column++] = SATURATE8S(value); | |
| value = entry->value1; | |
| rowptr[column++] = SATURATE8S(value); | |
| } | |
| else { | |
| value = entry->value0; | |
| rowptr[column] = SATURATE8S(value); | |
| value = entry->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| column = 0; | |
| rowptr[column++] = SATURATE8S(value); | |
| } | |
| } | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| // Zero out the whole subband from here on | |
| rowptr += pitch; | |
| while(rowptr < maxptr) { | |
| ZeroHighPassRow((PIXEL *)rowptr, length); | |
| rowptr += pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) | |
| { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if (entry->value1 == 0) | |
| { | |
| value = entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| assert(0 <= column && column < width); | |
| // Store the saturated value at the position found in the scan | |
| rowptr[column] = SATURATE8S(value); | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= width) | |
| { | |
| // Compute the starting column for the next row | |
| column -= width; | |
| // Advance to the next row | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < width); | |
| if(column < width-1) { | |
| value = entry->value0; | |
| rowptr[column++] = SATURATE8S(value); | |
| value = entry->value1; | |
| rowptr[column++] = SATURATE8S(value); | |
| } | |
| else { | |
| value = entry->value0; | |
| rowptr[column] = SATURATE8S(value); | |
| value = entry->value1; | |
| rowptr += pitch; | |
| if(rowptr < maxptr) ZeroHighPassRow((PIXEL *)rowptr, length); | |
| column = 0; | |
| rowptr[column++] = SATURATE8S(value); | |
| } | |
| } | |
| } | |
| } | |
| #endif | |
| // same as DecodeBandFSM8sNoGap but output to 16bit data | |
| bool DecodeBandFSM16sNoGap2Pass(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, int quant) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL *rowptr = (PIXEL *)image; | |
| PIXEL16S *bandendptr; | |
| int value; | |
| #if ERROR_TOLERANT | |
| uint8_t *startCurrentWord = stream->lpCurrentWord; | |
| int32_t startWordsUsed = stream->nWordsUsed; | |
| #endif | |
| #if _FSMBUFFER | |
| __declspec(align(32)) FSMENTRY buffer; | |
| #endif | |
| if (image == NULL) { | |
| return false; | |
| } | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| pitch /= sizeof(PIXEL16S); | |
| // Zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height*sizeof(PIXEL16S)); | |
| // This Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == 2 * FSM_INDEX_SIZE); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| bandendptr = rowptr + height * pitch; | |
| #if 0 // test for errors. | |
| { | |
| if((rand() % 10) == 1) | |
| stream->lpCurrentWord[rand()%50] ^= 1; | |
| } | |
| #endif | |
| // Decode runs and magnitude values until the entire band is decoded | |
| #if ERROR_TOLERANT | |
| while((intptr_t)bandendptr - (intptr_t)rowptr >= 0) | |
| #else | |
| for (;;) | |
| #endif | |
| { | |
| // Read a byte from the bitstream | |
| #if ERROR_TOLERANT | |
| if(stream->nWordsUsed) | |
| { | |
| byte = GetFastByte(stream); | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| #else | |
| byte = GetFastByte(stream); | |
| #endif | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| goto SecondPass; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = value;//SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| goto SecondPass; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = value;//SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| } | |
| SecondPass: | |
| rowptr = (PIXEL16S *)image; | |
| AlignBits(stream); | |
| AlignBitsTag(stream); | |
| stream->lpCurrentWord += 4; | |
| stream->nWordsUsed -= 4; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| #if ERROR_TOLERANT | |
| while((intptr_t)bandendptr - (intptr_t)rowptr >= 0) | |
| #else | |
| for (;;) | |
| #endif | |
| { | |
| // Read a byte from the bitstream | |
| #if ERROR_TOLERANT | |
| if(stream->nWordsUsed) | |
| { | |
| byte = GetFastByte(stream); | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| #else | |
| byte = GetFastByte(stream); | |
| #endif | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] |= value << 8; | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] |= value << 8; | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] |= value << 8; | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] |= value << 8; | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| } | |
| #if ERROR_TOLERANT | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| // Backup the bitstream to the beginning of the band | |
| stream->lpCurrentWord = startCurrentWord; | |
| stream->nWordsUsed = startWordsUsed; | |
| #if 0 | |
| AlignBitsTag(stream); | |
| // Read the debugging marker | |
| { | |
| TAGVALUE segment; | |
| do | |
| { | |
| segment = GetTagValue(stream); | |
| } | |
| while(segment.tuple.tag != CODEC_TAG_BAND_TRAILER); | |
| stream->lpCurrentWord -= 4; | |
| stream->nWordsUsed += 4; | |
| } | |
| #else | |
| SkipSubband(stream); | |
| #endif | |
| #endif | |
| return true; | |
| } | |
| // Same as DecodeBandFSM8sNoGap but output to 16bit data | |
| #if _DEBUG | |
| bool DecodeBandFSM16sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, FILE *logfile) | |
| #else | |
| bool DecodeBandFSM16sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch) | |
| #endif | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| FSMENTRYFAST *entryfast; | |
| PIXEL16S *rowptr = image; | |
| PIXEL16S *bandendptr; | |
| PIXEL16S *fastendptr; | |
| int32_t value; | |
| uint8_t *startCurrentWord = stream->lpCurrentWord; | |
| uint8_t *CurrentWord = stream->lpCurrentWord; | |
| int32_t startWordsUsed = stream->nWordsUsed; | |
| ptrdiff_t offset; | |
| #if _FSMBUFFER | |
| __declspec(align(32)) FSMENTRY buffer; | |
| #endif | |
| #if (0 && DEBUG) | |
| DebugOutputBitstreamPosition(stream); | |
| DebugOutputBitstreamBytes(stream, 16); | |
| #endif | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| #if (0 && DEBUG) | |
| DebugOutputFSM(fsm); | |
| #endif | |
| pitch /= sizeof(PIXEL16S); | |
| // Zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height*sizeof(PIXEL16S)); | |
| //memset(rowptr, 0, pitch*height*sizeof(PIXEL16S)); | |
| // This Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == 2 * FSM_INDEX_SIZE); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| bandendptr = rowptr + height * pitch; | |
| #if 0 // test for errors. | |
| { | |
| if((rand() % 10) == 1) | |
| stream->lpCurrentWord[rand()%50] ^= 1; | |
| } | |
| #endif | |
| fastendptr = bandendptr; | |
| fastendptr -= 500; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| while(rowptr < fastendptr) | |
| { | |
| // Read a byte from the bitstream | |
| byte = *CurrentWord++; | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entryfast = (FSMENTRYFAST *)GetFSMTableEntry(fsm, index); | |
| #if (0 && DEBUG) | |
| //DebugOutputFSMEntryFast(fsm, index, entryfast); | |
| PrintFSMEntryFast(fsm, index, entryfast, logfile); | |
| #endif | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entryfast->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entryfast->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| *((uint32_t *)rowptr) = entryfast->values; | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entryfast->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entryfast = (FSMENTRYFAST *)GetFSMTableEntry(fsm, index); | |
| #if (0 && DEBUG) | |
| //DebugOutputFSMEntryFast(fsm, index, entryfast); | |
| PrintFSMEntryFast(fsm, index, entryfast, logfile); | |
| #endif | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entryfast->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entryfast->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| *((uint32_t *)rowptr) = entryfast->values; | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entryfast->pre_post_skip >> 12]; | |
| } | |
| offset = CurrentWord - startCurrentWord; | |
| stream->lpCurrentWord += offset; | |
| stream->nWordsUsed -= (int)offset; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| #if ERROR_TOLERANT | |
| while(bandendptr >= rowptr) | |
| #else | |
| for (;;) | |
| #endif | |
| { | |
| #if (0 && DEBUG) | |
| if (!(rowptr < bandendptr)) { | |
| return true; | |
| } | |
| #endif | |
| #if (0 && DEBUG) | |
| PrintBitstreamPosition(stream, logfile); | |
| #endif | |
| // Read a byte from the bitstream | |
| #if ERROR_TOLERANT | |
| if(stream->nWordsUsed) | |
| { | |
| byte = GetFastByte(stream); | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| #else | |
| byte = GetFastByte(stream); | |
| #endif | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if (0 && DEBUG) | |
| //DebugOutputFSMEntry(fsm, index, entry); | |
| PrintFSMEntry(fsm, index, entry, logfile); | |
| #endif | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| if ((value = entry->value0)) { | |
| rowptr[0] = value;//SATURATE(value); | |
| } | |
| // Write down the second decoded magnitude | |
| if ((value = entry->value1)) { | |
| rowptr[1] = value;//SATURATE(value); | |
| } | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if (0 && DEBUG) | |
| //DebugOutputFSMEntry(fsm, index, entry); | |
| PrintFSMEntry(fsm, index, entry, logfile); | |
| #endif | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| if ((value = entry->value0)) { | |
| rowptr[0] = value;//SATURATE(value); | |
| } | |
| // Write down the second decoded magnitude | |
| if ((value = entry->value1)) { | |
| rowptr[1] = value;//SATURATE(value); | |
| } | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| } | |
| #if ERROR_TOLERANT | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| // Backup the bitstream to the beginning of the band | |
| stream->lpCurrentWord = startCurrentWord; | |
| stream->nWordsUsed = startWordsUsed; | |
| #if 0 | |
| AlignBitsTag(stream); | |
| // Read the debugging marker | |
| { | |
| TAGVALUE segment; | |
| do | |
| { | |
| segment = GetTagValue(stream); | |
| } | |
| while(segment.tuple.tag != CODEC_TAG_BAND_TRAILER); | |
| stream->lpCurrentWord -= 4; | |
| stream->nWordsUsed += 4; | |
| } | |
| #else | |
| SkipSubband(stream); | |
| #endif | |
| #endif | |
| return true; | |
| } | |
| bool DecodeBandFSM16sNoGapWithPeaks(FSM *fsm, BITSTREAM *stream, PIXEL16S *image, int width, int height, int pitch, PIXEL *peaks, int level, int quant) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL16S *rowptr = image; | |
| PIXEL16S *bandendptr; | |
| PIXEL16S *fastendptr; | |
| int32_t value; | |
| uint8_t *startCurrentWord = stream->lpCurrentWord; | |
| uint8_t *CurrentWord = stream->lpCurrentWord; | |
| int32_t startWordsUsed = stream->nWordsUsed; | |
| #if _FSMBUFFER | |
| __declspec(align(32)) FSMENTRY buffer; | |
| #endif | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| //This is been called with non-prequantized FSM | |
| if(quant>1) level /= quant; | |
| pitch /= sizeof(PIXEL16S); | |
| // Zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height*sizeof(PIXEL16S)); | |
| // This Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == 2 * FSM_INDEX_SIZE); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| bandendptr = rowptr + height * pitch; | |
| #if 0 // test for errors. | |
| { | |
| if((rand() % 10) == 1) | |
| stream->lpCurrentWord[rand()%50] ^= 1; | |
| } | |
| #endif | |
| fastendptr = bandendptr; | |
| fastendptr -= 1000; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| while(rowptr < fastendptr) | |
| { | |
| // Read a byte from the bitstream | |
| byte = *CurrentWord++; | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| if(abs(value) > level) | |
| rowptr[0] = *peaks++ / quant; | |
| else | |
| rowptr[0] = value;//SATURATE(value); | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| if(abs(value) > level) | |
| rowptr[0] = *peaks++ / quant; | |
| else | |
| rowptr[0] = value;//SATURATE(value); | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| } | |
| stream->lpCurrentWord += ((intptr_t)CurrentWord - (intptr_t)startCurrentWord); | |
| stream->nWordsUsed -= (int)(((intptr_t)CurrentWord - (intptr_t)startCurrentWord)); | |
| // Decode runs and magnitude values until the entire band is decoded | |
| #if ERROR_TOLERANT | |
| while(((intptr_t)bandendptr - (intptr_t)rowptr) >= 0) | |
| #else | |
| for (;;) | |
| #endif | |
| { | |
| #if (0 && DEBUG) | |
| if (!(rowptr < bandendptr)) { | |
| return true; | |
| } | |
| #endif | |
| // Read a byte from the bitstream | |
| #if ERROR_TOLERANT | |
| if(stream->nWordsUsed) | |
| { | |
| byte = GetFastByte(stream); | |
| } | |
| else | |
| { | |
| break; | |
| } | |
| #else | |
| byte = GetFastByte(stream); | |
| #endif | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| if(abs(value) > level) | |
| rowptr[0] = *peaks++ / quant; | |
| else | |
| rowptr[0] = value;//SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip & 0xfff]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| if(abs(value) > level) | |
| rowptr[0] = *peaks++ / quant; | |
| else | |
| rowptr[0] = value;//SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = value;//SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_post_skip >> 12]; | |
| } | |
| #if ERROR_TOLERANT | |
| // Reset the decoder | |
| ResetFSM(fsm); | |
| // Backup the bitstream to the beginning of the band | |
| stream->lpCurrentWord = startCurrentWord; | |
| stream->nWordsUsed = startWordsUsed; | |
| #if 0 | |
| AlignBitsTag(stream); | |
| // Read the debugging marker | |
| { | |
| TAGVALUE segment; | |
| do | |
| { | |
| segment = GetTagValue(stream); | |
| } | |
| while(segment.tuple.tag != CODEC_TAG_BAND_TRAILER); | |
| stream->lpCurrentWord -= 4; | |
| stream->nWordsUsed += 4; | |
| } | |
| #else | |
| SkipSubband(stream); | |
| #endif | |
| #endif | |
| return true; | |
| } | |
| // This version of DecodeBandFSM() assumes that the gap between width and pitch has been coded as | |
| // zero runs. Therefore decoded magnitude values can be written down without the need to check | |
| // if the end of a row has been reached. Hence the total number of conditionals in DecodeBandFSM | |
| // can be significantly reduced. | |
| // Decode a subband using FSM. One byte is read from the bitstream each time and decoded in two steps | |
| // Original version that does not use a separate buffer for decoding | |
| #if !_INDIVIDUAL_ENTRY | |
| #if 0 //dan20041030 not used | |
| bool DecodeBandFSM8sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL8S *image, int width, int height, int pitch) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL8S *rowptr = image; | |
| PIXEL8S *bandendptr; | |
| int32_t value; | |
| #if _FSMBUFFER | |
| __declspec(align(32)) FSMENTRY buffer; | |
| #endif | |
| pitch /= sizeof(PIXEL8S); | |
| // Zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height); | |
| // This version of Huffman decoder assumes that one byte | |
| // is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| bandendptr = rowptr + height * pitch; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| //while (rowptr < bandendptr) | |
| for (;;) | |
| { | |
| #if (0 && DEBUG) | |
| if (!(rowptr < bandendptr)) { | |
| return true; | |
| } | |
| #endif | |
| // Check that the decoder has not overrun the output array | |
| //assert(rowptr < bandendptr); | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| #if 1 | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| #endif | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->post_skip]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| #if _FSMBUFFER | |
| memcpy(&buffer, entry, sizeof(FSMENTRY)); | |
| entry = &buffer; | |
| #endif | |
| #if 1 | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| #endif | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->post_skip]; | |
| } | |
| } | |
| #endif | |
| #elif _SINGLE_FSM_TABLE | |
| bool DecodeBandFSM8sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL8S *image, int width, int height, int pitch) | |
| { | |
| int index, byte, i; | |
| FSMENTRY *entry,*firstentry = fsm->table->firstentry; | |
| PIXEL8S *rowptr = image; | |
| PIXEL8S *bandendptr; | |
| int32_t value; | |
| pitch /= sizeof(PIXEL8S); | |
| // Zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height); | |
| // The Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == 2 * FSM_INDEX_SIZE); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Decode runs and magnitude values until the entire band is decoded | |
| for (;;) | |
| { | |
| // Check that the decoder has not overrun the output array | |
| //assert(rowptr < bandendptr); | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| i = (fsm->next_state_index << FSM_INDEX_SIZE) | index;//DAN | |
| entry = firstentry+i; //DAN | |
| // Return if the subband is decoded completely | |
| if(entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSMIndividual(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSMIndividual(fsm, (entry->next_state)); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->post_skip]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| i = (fsm->next_state_index << FSM_INDEX_SIZE) | index;//DAN | |
| entry = firstentry+i; //DAN | |
| // Return if the subband is decoded completely | |
| if(entry->value0 == BAND_END_TRAILER) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSMIndividual(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSMIndividual(fsm, (entry->next_state)); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->post_skip]; | |
| } | |
| } | |
| #else | |
| bool DecodeBandFSM8sNoGap(FSM *fsm, BITSTREAM *stream, PIXEL8S *image, int width, int height, int pitch) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| PIXEL8S *rowptr = image; | |
| PIXEL8S *bandendptr; | |
| int32_t value; | |
| #if 1 | |
| __declspec(align(4)) FSMENTRY buffer; | |
| #endif | |
| pitch /= sizeof(PIXEL8S); | |
| // zero out the entire subband | |
| ZeroHighPassRow((PIXEL *)rowptr, pitch*height); | |
| // The Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == 2 * FSM_INDEX_SIZE); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| bandendptr = rowptr + height * pitch; | |
| // Decode runs and magnitude values until the entire band is decoded | |
| for (;;) | |
| { | |
| #if (0 && DEBUG) | |
| if (!(rowptr < bandendptr)) { | |
| return true; | |
| } | |
| #endif | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntryIndividual(fsm, index); | |
| // Return if the subband is decoded completely | |
| if(entry == NULL) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSMIndividual(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSMIndividual(fsm, (entry->next_state)); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the appropriate distance | |
| rowptr = &rowptr[entry->post_skip]; | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntryIndividual(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry == NULL) | |
| { | |
| assert(rowptr <= bandendptr); | |
| ResetFSMIndividual(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSMIndividual(fsm, (entry->next_state)); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->pre_skip]; | |
| // Write down the first decoded magnitude | |
| value = entry->value0; | |
| rowptr[0] = SATURATE(value); | |
| // Write down the second decoded magnitude | |
| value = entry->value1; | |
| rowptr[1] = SATURATE(value); | |
| // Skip the decoded zero runs | |
| rowptr = &rowptr[entry->post_skip]; | |
| } | |
| } | |
| #endif | |
| // Decode the highpass band coefficients but do not write them out - used in SIF mode | |
| bool SkipBandFSM(FSM *fsm, BITSTREAM *stream, PIXEL8S *image, int width, int height, int pitch) | |
| { | |
| int index, byte; | |
| FSMENTRY *entry; | |
| pitch /= sizeof(PIXEL8S); | |
| // The Huffman decoder assumes each byte is processed as two 4-bit chunks | |
| assert(BITSTREAM_WORD_SIZE == FSM_INDEX_SIZE*2); | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Decode runs and magnitude values until the entire band is decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| // Decode the first 4-bit chunk | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) { | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // decode the second 4-bit chunk | |
| index = byte & ((1<<FSM_INDEX_SIZE)-1); | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) { | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| } | |
| } | |
| #if _TIMING | |
| extern TIMER tk_fastruns; | |
| #endif | |
| #if 0 //dan20041030 not used | |
| // New version of coefficient runs decoder that uses a finite state machine with a scaling factor | |
| bool DecodeFastRunsFSM8s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| FILE *logfile = decoder->logfile; | |
| int result; | |
| // Get the pointer to the finite state machine | |
| FSM *fsm = &decoder->fsm[decoder->codec.active_codebook]; //DAN20041026 | |
| // All rows are treated as one long row that covers the entire band | |
| int size = fsm->table.num_states; | |
| PIXEL *rowptr; | |
| int row = 0; | |
| int pitch; | |
| int pixel_type = wavelet->pixel_type[band_index]; | |
| decoder->codec.active_codebook = 0; // reset CODEC state | |
| // Must have a valid wavelet | |
| assert(wavelet != NULL); | |
| if (wavelet == NULL) return false; | |
| //Must have a valid FSM | |
| assert(fsm != NULL); | |
| if(fsm == NULL) return false; | |
| assert(size > 0); | |
| if (size == 0) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // Check if the band is intended for 8-bit pixels | |
| assert(pixel_type == PIXEL_TYPE_8S); | |
| START(tk_fastruns); | |
| rowptr = (PIXEL *)wavelet->band[band_index]; | |
| pitch = wavelet->pitch8s; // Use the 8-bit pitch | |
| //pitch = wavelet->pitch; | |
| // The finite state machine does not support a marker at the end of rows | |
| #if RUNS_ROWEND_MARKER | |
| assert(0); | |
| #endif | |
| // Get one byte from the bitstream and decode 4 bits at a time | |
| result = DecodeBandFSM8sNoGap(fsm, stream, (PIXEL8S *)rowptr, width, height, pitch); | |
| assert(result == true); | |
| if (result != true) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| #if (0 && DEBUG && _WIN32) | |
| _CrtCheckMemory(); | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) | |
| DumpBand("Band", wavelet, band_index, NULL, logfile); | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "DecodeFastRunsFSM8s, band index: %d\n", band_index); | |
| DumpWaveletRow(wavelet, band_index, 0, logfile); | |
| } | |
| #endif | |
| end: | |
| STOP(tk_fastruns); | |
| return true; | |
| } | |
| #endif | |
| #if _DEQUANTIZE_IN_FSM | |
| void ReQuantFSM(FSM *fsm, int quant) | |
| { | |
| int count = 0; | |
| int i, j; | |
| short *restore = &fsm->restoreFSM[0]; | |
| #if !_INDIVIDUAL_ENTRY | |
| for (i = 0; i < fsm->table.num_states; i++) | |
| { | |
| FSMENTRY *entry = fsm->table.entries[i]; | |
| for (j = 0; j < (1 << FSM_INDEX_SIZE); j++) | |
| { | |
| entry[j].value0 = restore[count++]; | |
| entry[j].value1 = restore[count++]; | |
| } | |
| } | |
| #else | |
| for (i = 0; i < (fsm->table.num_states << FSM_INDEX_SIZE); i++) | |
| { | |
| FSMENTRY *entry = fsm_table.entries_ind[i]; | |
| if(entry) | |
| { | |
| entry->value0 = restore[count++]; | |
| entry->value1 = restore[count++]; | |
| } | |
| } | |
| #endif | |
| } | |
| void DeQuantFSM(FSM *fsm, int quant) | |
| { | |
| int i, j; | |
| if(fsm->LastQuant > 1 && fsm->LastQuant != quant) | |
| { | |
| ReQuantFSM(fsm, fsm->LastQuant); | |
| } | |
| else if(fsm->LastQuant == quant) | |
| { | |
| return; | |
| } | |
| if(fsm->InitizedRestore == 0) | |
| { | |
| short *restore = &fsm->restoreFSM[0]; | |
| int count = 0; | |
| #if !_INDIVIDUAL_ENTRY | |
| for (i = 0; i < fsm->table.num_states; i++) | |
| { | |
| FSMENTRY *entry = fsm->table.entries[i]; | |
| for (j = 0; j < (1 << FSM_INDEX_SIZE); j++) | |
| { | |
| restore[count++] = entry[j].value0; | |
| restore[count++] = entry[j].value1; | |
| } | |
| } | |
| #else | |
| for (i = 0; i < (fsm->table.num_states << FSM_INDEX_SIZE); i++) | |
| { | |
| FSMENTRY *entry = fsm->table.entries_ind[i]; | |
| if(entry) | |
| { | |
| restore[count++] = entry->value0; | |
| restore[count++] = entry->value1; | |
| } | |
| } | |
| #endif | |
| fsm->InitizedRestore = 1; | |
| } | |
| #if !_INDIVIDUAL_ENTRY | |
| for (i = 0; i < fsm->table.num_states; i++) | |
| { | |
| FSMENTRY *entry = fsm->table.entries[i]; | |
| for (j = 0; j < (1 << FSM_INDEX_SIZE); j++) | |
| { | |
| if(entry[j].value0 < 0x7ff0) // band end trailer | |
| entry[j].value0 *= quant; | |
| entry[j].value1 *= quant; | |
| } | |
| } | |
| #else | |
| for (i = 0; i < (fsm->table.num_states << FSM_INDEX_SIZE); i++) | |
| { | |
| FSMENTRY *entry = fsm->table.entries_ind[i]; | |
| if(entry) | |
| { | |
| if(entry->value0 < 0x7ff0) // band end trailer etc | |
| entry->value0 *= quant; | |
| entry->value1 *= quant; | |
| } | |
| } | |
| #endif | |
| fsm->LastQuant = quant; | |
| } | |
| #endif // _DEQUANTIZE_IN_FSM | |
| // New version of coefficient runs decoder that uses a finite state machine with a scaling factor | |
| //dan 7-11-03 | |
| bool DecodeFastRunsFSM16s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height, int threading) | |
| { | |
| //CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| int result = true; | |
| int quant = wavelet->quantization[band_index]; | |
| int active_codebook = decoder->codec.active_codebook; | |
| // Get the pointer to the finite state machine | |
| FSM *fsm = &decoder->fsm[active_codebook]; | |
| int size; | |
| PIXEL *rowptr; | |
| //int row = 0; | |
| int pitch; | |
| CODEC_STATE *codec = &decoder->codec; | |
| //int channel = codec->channel; | |
| //int subband = codec->band.subband; | |
| //int num_subbands = codec->num_subbands; | |
| //int pixel_type = wavelet->pixel_type[band_index]; | |
| int difference_coding = decoder->codec.difference_coding; | |
| //int localquant = 1; | |
| int peaklevel = 0; | |
| //int peaksize = 0; | |
| PIXEL *peakbase = NULL; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Subband: %d, active_codebook: %d, difference_coding: %d\n", | |
| subband, decoder->codec.active_codebook, difference_coding); | |
| } | |
| #endif | |
| decoder->codec.active_codebook = 0; // reset CODEC state | |
| decoder->codec.difference_coding = 0; //reset state for next subband | |
| // Must have a valid wavelet | |
| assert(wavelet != NULL); | |
| if (wavelet == NULL) return false; | |
| //Must have a valid FSM | |
| assert(fsm != NULL); | |
| if(fsm == NULL) return false; | |
| // All rows are treated as one long row that covers the entire band | |
| size = fsm->table.num_states; | |
| assert(size > 0); | |
| if (size == 0) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // Check if the band is intended for 8-bit pixels | |
| assert(wavelet->pixel_type[band_index] == PIXEL_TYPE_16S); | |
| START(tk_fastruns); | |
| rowptr = (PIXEL *)wavelet->band[band_index]; | |
| //pitch = wavelet->pitch8s; // Use the 8-bit pitch | |
| pitch = wavelet->pitch; | |
| peaklevel = codec->peak_table.level; | |
| peakbase = codec->peak_table.base; | |
| #if _THREADED | |
| threading = decoder->entropy_worker_new.pool.thread_count > 1 ? threading : 0; | |
| if(threading) | |
| { | |
| decoder->entropy_worker_new.threads_used = 1; | |
| { | |
| //int start = stream->nWordsUsed; | |
| int end; | |
| struct entropy_data_new *data; | |
| int next_queue_num = decoder->entropy_worker_new.next_queue_num++; | |
| data = &decoder->entropy_worker_new.entropy_data[next_queue_num]; | |
| memcpy(&data->stream,stream, sizeof(BITSTREAM)); | |
| data->rowptr = rowptr; | |
| data->width = width; | |
| data->height = height; | |
| data->pitch = pitch; | |
| data->peaks = peakbase; | |
| data->level = peaklevel; | |
| data->quant = quant; | |
| data->wavelet = wavelet; | |
| data->band_index = band_index; | |
| data->active_codebook = active_codebook; | |
| data->difference_coding = difference_coding; | |
| // Start only a particular threadid | |
| if(next_queue_num == 0) | |
| { | |
| ThreadPoolSetWorkCount(&decoder->entropy_worker_new.pool, 1); | |
| #if _DELAYED_THREAD_START==0 | |
| ThreadPoolSendMessage(&decoder->entropy_worker_new.pool, THREAD_MESSAGE_START); | |
| #endif | |
| } | |
| else | |
| { // Set the work count to the number of rows to process | |
| ThreadPoolAddWorkCount(&decoder->entropy_worker_new.pool, 1); | |
| } | |
| { | |
| unsigned short tag = *(stream->lpCurrentWord-8) << 8; | |
| if(tag == (unsigned short)OPTIONALTAG(CODEC_TAG_SUBBAND_SIZE)) | |
| { | |
| int chunksize; | |
| int value = *(stream->lpCurrentWord-6) << 8; | |
| value |= *(stream->lpCurrentWord-5); | |
| tag |= *(stream->lpCurrentWord-7); | |
| tag = NEG(tag); | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| chunksize *= 4; | |
| chunksize -= 8; | |
| { | |
| uint32_t *ptr = (uint32_t *)stream->lpCurrentWord; | |
| ptr += (chunksize>>2); | |
| if(*ptr != 0x00003800) // bandend | |
| { | |
| goto continuesearch; | |
| } | |
| } | |
| stream->lpCurrentWord += chunksize; | |
| stream->nWordsUsed -= chunksize; | |
| end = stream->nWordsUsed; | |
| } | |
| else | |
| { | |
| continuesearch: | |
| while(*((uint32_t *)stream->lpCurrentWord) != 0x00003800) // bandend | |
| { | |
| stream->lpCurrentWord += 4; | |
| stream->nWordsUsed -= 4; | |
| } | |
| end = stream->nWordsUsed; | |
| } | |
| } | |
| } | |
| } | |
| else | |
| #endif // _THREADED | |
| { | |
| DeQuantFSM(fsm, quant); | |
| if (peaklevel) | |
| { | |
| result = DecodeBandFSM16sNoGapWithPeaks(fsm, stream, (PIXEL16S *)rowptr, width, height, pitch, peakbase, peaklevel, 1); | |
| } | |
| else | |
| { | |
| #if _DEBUG | |
| result = DecodeBandFSM16sNoGap(fsm, stream, (PIXEL16S *)rowptr, width, height, pitch, logfile); | |
| #else | |
| result = DecodeBandFSM16sNoGap(fsm, stream, (PIXEL16S *)rowptr, width, height, pitch); | |
| #endif | |
| } | |
| if(difference_coding) | |
| { | |
| int x,y; | |
| PIXEL *line = rowptr; | |
| for(y=0;y<height;y++) | |
| { | |
| for(x=1;x<width;x++) | |
| { | |
| line[x] += line[x-1]; | |
| } | |
| line += pitch/2; | |
| } | |
| } | |
| if (result) | |
| { | |
| // Call thread safe routine to update the band valid flags | |
| UpdateWaveletBandValidFlags(decoder, wavelet, band_index); | |
| } | |
| } | |
| assert(result == true); | |
| if (result != true) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| //end: | |
| STOP(tk_fastruns); | |
| return true; | |
| } | |
| bool SkipFastRunsFSM(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height) | |
| { | |
| //CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| int result; | |
| // Get the pointer to the finite state machine | |
| FSM *fsm = &decoder->fsm[decoder->codec.active_codebook]; //DAN20041026 | |
| // All rows are treated as one long row that covers the entire band | |
| int size = fsm->table.num_states; | |
| PIXEL *rowptr; | |
| //int row = 0; | |
| int pitch; | |
| //int pixel_type = wavelet->pixel_type[band_index]; | |
| decoder->codec.active_codebook = 0; // reset CODEC state | |
| // Must have a valid wavelet | |
| assert(wavelet != NULL); | |
| if (wavelet == NULL) return false; | |
| //Must have a valid FSM | |
| assert(fsm != NULL); | |
| if(fsm == NULL) return false; | |
| assert(size > 0); | |
| if (size == 0) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // Check if the band is 8bit/pixel | |
| assert(wavelet->pixel_type[band_index] == PIXEL_TYPE_8S); | |
| START(tk_fastruns); | |
| rowptr = (PIXEL *)wavelet->band[band_index]; | |
| pitch = wavelet->pitch8s; // Use the 8-bit pitch | |
| // The finite state machine does not support a marker at the end of rows | |
| #if RUNS_ROWEND_MARKER | |
| assert(0); | |
| #endif | |
| #if 1 // Get one byte from the bitstream and decode 4 bits at a time | |
| result = SkipBandFSM(fsm, stream, (PIXEL8S *)rowptr, width, height, pitch); | |
| assert(result == true); | |
| if (result != true) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| #endif | |
| #if (0 && DEBUG && _WIN32) | |
| _CrtCheckMemory(); | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) | |
| DumpBand("Band", wavelet, band_index, NULL, logfile); | |
| #endif | |
| //end: | |
| STOP(tk_fastruns); | |
| return true; | |
| } | |
| // The third version is also based on the finite state machine decoder with | |
| // gaps between rows encoded as zero runs, but dequantization is performed as | |
| // the highpass values are read from the bitstream and placed into a row buffer. | |
| // The highpass values are not written into the wavelet highpass band. | |
| // Eventually this routine will be merged into the routine DecodeTemporalBand8s | |
| // since this routine contains code specific to the inverse temporal transform | |
| // and DecodeTemporalBand8s has become a shell. | |
| #if 0 | |
| bool DecodeBandRunsFSM8s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, | |
| int band_index, int width, int height, | |
| IMAGE *frame0, IMAGE *frame1) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| FILE *logfile = decoder->logfile; | |
| int result; | |
| // Get the pointer to the finite state machine | |
| FSM *fsm = &decoder->fsm; | |
| // All rows are treated as one long row that covers the entire band | |
| int size = fsm->table.num_states; | |
| PIXEL *lowpass = wavelet->band[0]; | |
| int lowpass_pitch = wavelet->pitch; | |
| //PIXEL8S *rowptr; | |
| int row = 0; | |
| int pitch; | |
| int row_width; // Width of the encoded row of highpass coefficients | |
| PIXEL *even = frame0->band[0]; | |
| PIXEL *odd = frame1->band[0]; | |
| int even_pitch = frame0->pitch; | |
| int odd_pitch = frame1->pitch; | |
| int pixel_type = wavelet->pixel_type[band_index]; | |
| int quantization = wavelet->quantization[band_index]; | |
| PIXEL *buffer; | |
| size_t buffer_size; | |
| int index, byte; | |
| FSMENTRY *entry; | |
| int column = 0; | |
| int32_t value; | |
| int buffer_row_size; | |
| PIXEL *highpass; | |
| // Check that the wavelet into which the band will be decoded is valid | |
| assert(wavelet != NULL); | |
| if (wavelet == NULL) return false; | |
| // Check that the finite state machine is valid | |
| assert(fsm != NULL); | |
| if (fsm == NULL) return false; | |
| assert(size > 0); | |
| if (size == 0) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| // Check that the band was encoded using 8-bit signed coefficients | |
| assert(pixel_type == PIXEL_TYPE_8S); | |
| pitch = wavelet->pitch8s; // Use the pitch for 8-bit packed rows | |
| // Get the buffer for storing one row of dequantized highpass coefficients | |
| buffer = (PIXEL *)decoder->buffer; | |
| buffer_size = decoder->buffer_size; | |
| // The finite state machine does not support a marker at the end of each row | |
| assert(RUNS_ROWEND_MARKER == 0); | |
| /***** Start of code included from DecodeBandFSM8s() *****/ | |
| // Check that one byte can be processes as two 4-bit nibbles | |
| assert(BITSTREAM_WORD_SIZE == (2 * FSM_INDEX_SIZE)); | |
| // Check that the bitstream buffer is empty | |
| assert(stream->nBitsFree == BITSTREAM_BUFFER_SIZE); | |
| // Convert the pitch to units of pixels | |
| pitch /= sizeof(PIXEL8S); | |
| buffer_row_size = pitch * sizeof(PIXEL); | |
| lowpass_pitch /= sizeof(PIXEL); | |
| even_pitch /= sizeof(PIXEL); | |
| odd_pitch /= sizeof(PIXEL); | |
| // Compute the address of the row after the last row in the band | |
| //maxptr = rowptr + height * pitch; | |
| // Round up the row length (in bytes) to a multiple of 16 bytes | |
| //row_size = ALIGN16(row_size); | |
| // Check that the buffer is large enough to hold one row | |
| //assert(buffer_size >= row_size); | |
| assert(buffer_size >= buffer_row_size); | |
| // Use the buffer for the row or highpass coefficients | |
| highpass = buffer; | |
| #if 1 | |
| // The row spans the allocated width (pitch) of the band in no gap mode | |
| row_width = pitch; | |
| #else | |
| // For debugging | |
| row_width = wavelet->encoded_pitch/sizeof(PIXEL8S); | |
| #endif | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| // Decode zero runs and magnitude values (with appended sign bit) | |
| // until the marker for the band end trailer has been decoded | |
| for (;;) | |
| { | |
| // Read a byte from the bitstream | |
| byte = GetFastByte(stream); | |
| /***** Decode the first 4-bit nibble *****/ | |
| // Decode the first 4-bit nibble | |
| index = byte >> FSM_INDEX_SIZE; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return when the entire band is decoded | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Process the rest of the subband | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| while (++row < height) | |
| { | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) | |
| { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if (entry->value1 == 0) | |
| { | |
| value = entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| assert(0 <= column && column < row_width); | |
| // Dequantize the value and store it in the highpass row buffer | |
| highpass[column] = quantization * value; | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < row_width); | |
| if (column < (row_width - 1)) { | |
| // Store both values in the current row | |
| highpass[column++] = quantization * entry->value0; | |
| highpass[column++] = quantization * entry->value1; | |
| } | |
| else { | |
| value = entry->value0; | |
| highpass[column] = quantization * value; | |
| value = entry->value1; | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| column = 0; | |
| highpass[column++] = quantization * value; | |
| } | |
| } | |
| /***** Decode the second 4-bit nibble *****/ | |
| // Decode the second 4-bit nibble | |
| index = byte & FSM_INDEX_MASK; | |
| // Index into the lookup table at that state | |
| entry = GetFSMTableEntry(fsm, index); | |
| // Return if the subband is decoded completely | |
| if (entry->value0 == BAND_END_TRAILER) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Process the rest of the subband | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| while (++row < height) | |
| { | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| } | |
| ResetFSM(fsm); | |
| return true; | |
| } | |
| // Set the pointer to the next state | |
| UpdateFSM(fsm, (int)entry->next_state); | |
| // If no magnitude value is decoded | |
| if (entry->value0 == 0) | |
| { | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| } | |
| // If there is only one decoded magnitude value | |
| else if (entry->value1 == 0) | |
| { | |
| value = entry->value0; | |
| column += entry->pre_skip; | |
| // The run length scan can go past the end of the row if the row ends | |
| // with a run of zeros and the next row begins with a run of zeros | |
| // Did the scan go beyond the end of the row? | |
| while (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| // Fill in the decoded magnitude | |
| // Check the column before storing the value | |
| //assert(index < width); | |
| assert(0 <= column && column < row_width); | |
| highpass[column] = quantization * value; | |
| column += entry->post_skip; | |
| // Did the scan go beyond the end of the row? | |
| if (column >= row_width) | |
| { | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Compute the starting column for the next row | |
| column -= row_width; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| } | |
| } | |
| // If there are two decoded magnitude values | |
| else | |
| { | |
| // Check the column before storing values | |
| assert(0 <= column && column < row_width); | |
| if (column < (row_width - 1)) { | |
| // Store both highpass values in the current row | |
| highpass[column++] = quantization * entry->value0; | |
| highpass[column++] = quantization * entry->value1; | |
| } | |
| else { | |
| highpass[column] = quantization * entry->value0; | |
| value = entry->value1; | |
| // Dequantize the highpass coefficients | |
| //DequantizeBandRow(rowptr, width, quantization, highpass); | |
| // Apply the inverse temporal transform to the current row | |
| InvertTemporalRow16s(lowpass, highpass, even, odd, width); | |
| // Advance to the next lowpass input row | |
| lowpass += lowpass_pitch; | |
| // Advance to the next even and odd output rows | |
| even += even_pitch; | |
| odd += odd_pitch; | |
| // Advance to the next row | |
| row++; | |
| // Clear the highpass buffer before decoding the non-zero coefficients | |
| ZeroHighPassRow(highpass, buffer_row_size); | |
| column = 0; | |
| highpass[column++] = quantization * value; | |
| } | |
| } | |
| } | |
| /***** End of the code included from DecodeBandFSM8s() *****/ | |
| #if 0 | |
| assert(result == true); | |
| if (result != true) { | |
| decoder->error = CODEC_ERROR_RUN_DECODE; | |
| return false; | |
| } | |
| #endif | |
| #if (0 && DEBUG && _WIN32) | |
| _CrtCheckMemory(); | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) | |
| DumpBand("Band", wavelet, band_index, NULL, logfile); | |
| #endif | |
| #if 0 | |
| end: | |
| return true; | |
| #endif | |
| } | |
| #endif | |
| /***** End of the code for the finite state machine decoder *****/ | |
| #if 1 | |
| // The second version applies the horizontal inverse filters row by row, so the | |
| // memory access pattern is more efficient. The lowpass and highpass temporal | |
| // coefficients for each row are inverted and packed into the output in one pass. | |
| // Apply the inverse horizontal-temporal transform and pack the output into a buffer | |
| void TransformInverseFrameToYUV(TRANSFORM *transform[], int frame_index, int num_channels, | |
| uint8_t *output, int output_pitch, FRAME_INFO *frame, | |
| const SCRATCH *scratch, int chroma_offset, int precision) | |
| { | |
| // Pointers to the rows in the horizontal wavelet for each channel | |
| PIXEL *horizontal_lowlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_lowhigh[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highhigh[TRANSFORM_MAX_CHANNELS]; | |
| // Horizontal wavelet band width and pitch | |
| int horizontal_width[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch8s[TRANSFORM_MAX_CHANNELS]; | |
| // Quantization factors | |
| int lowlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int lowhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| // Pointers to the rows in the temporal wavelet for each channel | |
| PIXEL *temporal_lowpass[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *temporal_highpass[TRANSFORM_MAX_CHANNELS]; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| #if DEBUG | |
| size_t buffer_size = scratch->free_size; | |
| #endif | |
| // Dimensions of the reconstructed frame | |
| int frame_width = frame->width; | |
| int frame_height = frame->height; | |
| int half_height = frame_height / 2; | |
| size_t temporal_row_size = frame_width * sizeof(PIXEL); | |
| int field_pitch = 2 * output_pitch; | |
| int output_width; | |
| int channel; | |
| int row; | |
| // Round up the temporal row size to an integral number of cache lines | |
| temporal_row_size = ALIGN(temporal_row_size, _CACHE_LINE_SIZE); | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(buffer, _CACHE_LINE_SIZE)); | |
| // Check that the number of channels is reasonable | |
| assert(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Check that the buffer is large enough | |
| #if DEBUG | |
| assert((2 * num_channels * temporal_row_size) <= buffer_size); | |
| #endif | |
| // Allocate buffers for a single row of lowpass and highpass temporal coefficients | |
| // and initialize the arrays of row pointers into the horizontal transform bands | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| #if (0 && DEBUG) | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| int i; | |
| sprintf(label, "Frame%d-%d-", frame_index, count); | |
| DumpPGM(label, wavelet, NULL); | |
| for (i = 1; i < wavelet->num_bands; i++) | |
| { | |
| sprintf(label, "Frame-%d-band%d-%d-", frame_index, i, count); | |
| DumpBandPGM(label, wavelet, i, NULL); | |
| } | |
| } | |
| count++; | |
| #endif | |
| // Initialize the row pointers into the horizontal bands | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| lowlow_quantization[channel] = wavelet->quantization[LL_BAND]; | |
| lowhigh_quantization[channel] = wavelet->quantization[LH_BAND]; | |
| highlow_quantization[channel] = wavelet->quantization[HL_BAND]; | |
| highhigh_quantization[channel] = wavelet->quantization[HH_BAND]; | |
| // Compute the pitch in units of pixels | |
| horizontal_pitch[channel] = wavelet->pitch/sizeof(PIXEL); | |
| // Compute the 8-bit pitch in units of pixels | |
| horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL); | |
| //horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL8S); | |
| // Remember the width of the horizontal wavelet rows for this channel | |
| horizontal_width[channel] = wavelet->width; | |
| //TODO: Need to recode the buffer allocations using the scratch space API | |
| // Divide the buffer into temporal lowpass and highpass rows | |
| temporal_lowpass[channel] = (PIXEL *)(buffer + (2 * channel) * temporal_row_size); | |
| temporal_highpass[channel] = (PIXEL *)(buffer + (2 * channel + 1) * temporal_row_size); | |
| } | |
| // Process one row at a time from each channel | |
| for (row = 0; row < half_height; row++) | |
| { | |
| PIXEL *line_buffer = (PIXEL *)(buffer + (2 * num_channels + 2) * temporal_row_size); | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| //int pitch8s = horizontal_pitch8s[channel]; | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| InvertHorizontalRow16s8sTo16sBuffered(horizontal_lowlow[channel], lowlow_quantization[channel], | |
| (PIXEL8S *)horizontal_lowhigh[channel], lowhigh_quantization[channel], | |
| temporal_lowpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| //DAN20051004 -- possible reversiblity issue | |
| //InvertHorizontalRow8sBuffered //----------------------- Maybe bad | |
| InvertHorizontalRow16s8sTo16sBuffered(horizontal_highlow[channel], highlow_quantization[channel], | |
| (PIXEL8S *)horizontal_highhigh[channel], highhigh_quantization[channel], | |
| temporal_highpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| // Advance to the next row in each horizontal band in this channel | |
| horizontal_lowlow[channel] += pitch; | |
| horizontal_lowhigh[channel] += pitch; | |
| horizontal_highlow[channel] += pitch; | |
| horizontal_highhigh[channel] += pitch; | |
| } | |
| // The output width is twice the width of the wavelet bands | |
| output_width = 2 * horizontal_width[0]; | |
| // Adjust the frame width to fill to the end of each row | |
| //frame_width = output_pitch / 2; | |
| if (precision == CODEC_PRECISION_10BIT) | |
| { | |
| // Invert the temporal bands from all channels and pack output pixels | |
| switch (frame->format) | |
| { | |
| // Need to reduce the resolution from 10 bits to 8 bits during the inverse | |
| case DECODED_FORMAT_YUYV: | |
| InvertInterlacedRow16s10bitToYUV(temporal_lowpass, temporal_highpass, num_channels, | |
| output, output_pitch, output_width, frame_width, | |
| chroma_offset); | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| InvertInterlacedRow16s10bitToUYVY(temporal_lowpass, temporal_highpass, num_channels, | |
| output, output_pitch, output_width, frame_width, | |
| chroma_offset); | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| } | |
| else // Older code for 8-bit precision | |
| { | |
| int format; | |
| assert(precision == CODEC_PRECISION_8BIT); | |
| switch (frame->format) | |
| { | |
| case DECODED_FORMAT_YUYV: | |
| format = COLOR_FORMAT_YUYV; | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| format = COLOR_FORMAT_UYVY; | |
| break; | |
| } | |
| // Invert the temporal bands from all channels and pack output pixels | |
| InvertInterlacedRow16sToYUV(temporal_lowpass, temporal_highpass, num_channels, | |
| output, output_pitch, output_width, frame_width, | |
| chroma_offset, format); | |
| } | |
| // Advance to the next row in the packed output image | |
| output += field_pitch; | |
| } | |
| } | |
| #endif | |
| #if _INTERLACED_WORKER_THREADS | |
| void TransformInverseFrameSectionToYUV(DECODER *decoder, int thread_index, int frame_index, int num_channels, | |
| uint8_t *output, int output_pitch, FRAME_INFO *frame, | |
| int chroma_offset, int precision) | |
| { | |
| FILE *logfile = decoder->logfile; | |
| TRANSFORM **transform = decoder->transform; | |
| const SCRATCH *scratch = &decoder->scratch; | |
| // Pointers to the rows in the horizontal wavelet for each channel | |
| PIXEL *horizontal_lowlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_lowhigh[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highhigh[TRANSFORM_MAX_CHANNELS]; | |
| // Horizontal wavelet band width and pitch | |
| int horizontal_width[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch8s[TRANSFORM_MAX_CHANNELS]; | |
| // Quantization factors | |
| int lowlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int lowhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| // Pointers to the rows in the temporal wavelet for each channel | |
| PIXEL *temporal_lowpass[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *temporal_highpass[TRANSFORM_MAX_CHANNELS]; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| size_t buffer_size = scratch->free_size; | |
| uint8_t *output_row_ptr = output; | |
| // Dimensions of the reconstructed frame | |
| int frame_width = frame->width; | |
| int frame_height = frame->height; | |
| int half_height = frame_height / 2; | |
| size_t temporal_row_size = frame_width * sizeof(PIXEL); | |
| int field_pitch = 2 * output_pitch; | |
| int output_width; | |
| int channel; | |
| int row; | |
| HANDLE row_semaphore = decoder->interlaced_worker.row_semaphore; | |
| int return_value; | |
| // Round up the temporal row size to an integral number of cache lines | |
| temporal_row_size = ALIGN(temporal_row_size, _CACHE_LINE_SIZE); | |
| // Divide the buffer space between the four threads | |
| buffer_size /= 4; | |
| buffer += buffer_size * thread_index; | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(buffer, _CACHE_LINE_SIZE)); | |
| // Check that the number of channels is reasonable | |
| assert(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Check that the buffer is large enough | |
| assert((2 * num_channels * temporal_row_size) <= buffer_size); | |
| // Allocate buffers for a single row of lowpass and highpass temporal coefficients | |
| // and initialize the arrays of row pointers into the horizontal transform bands | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| #if (0 && DEBUG) | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| int i; | |
| sprintf(label, "Frame%d-%d-", frame_index, count); | |
| DumpPGM(label, wavelet, NULL); | |
| for (i = 1; i < wavelet->num_bands; i++) | |
| { | |
| sprintf(label, "Frame-%d-band%d-%d-", frame_index, i, count); | |
| DumpBandPGM(label, wavelet, i, NULL); | |
| } | |
| } | |
| count++; | |
| #endif | |
| // Initialize the row pointers into the horizontal bands | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| lowlow_quantization[channel] = wavelet->quantization[LL_BAND]; | |
| lowhigh_quantization[channel] = wavelet->quantization[LH_BAND]; | |
| highlow_quantization[channel] = wavelet->quantization[HL_BAND]; | |
| highhigh_quantization[channel] = wavelet->quantization[HH_BAND]; | |
| // Compute the pitch in units of pixels | |
| horizontal_pitch[channel] = wavelet->pitch/sizeof(PIXEL); | |
| // Compute the 8-bit pitch in units of pixels | |
| horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL); | |
| //horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL8S); | |
| // Remember the width of the horizontal wavelet rows for this channel | |
| horizontal_width[channel] = wavelet->width; | |
| //TODO: Need to recode the buffer allocations using the scratch space API | |
| // Divide the buffer into temporal lowpass and highpass rows | |
| temporal_lowpass[channel] = (PIXEL *)(buffer + (2 * channel) * temporal_row_size); | |
| temporal_highpass[channel] = (PIXEL *)(buffer + (2 * channel + 1) * temporal_row_size); | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Output buffer: %d (0x%p)\n", output, output); | |
| } | |
| #endif | |
| /* if (thread_index == 0) | |
| { | |
| row = 0; | |
| row_step = 1; | |
| } | |
| else if (thread_index == 1) | |
| { | |
| row = half_height - 1; | |
| row_step = -1; | |
| // Move to the bottom of the transform and process moving up | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int offset = horizontal_pitch[channel] * (half_height - 1); | |
| horizontal_lowlow[channel] += offset; | |
| horizontal_lowhigh[channel] += offset; | |
| horizontal_highlow[channel] += offset; | |
| horizontal_highhigh[channel] += offset; | |
| horizontal_pitch[channel] = NEG(horizontal_pitch[channel]); | |
| horizontal_pitch8s[channel] = NEG(horizontal_pitch8s[channel]); | |
| } | |
| output += field_pitch * (half_height - 1); | |
| field_pitch = NEG(field_pitch); | |
| } | |
| else | |
| { | |
| assert(0); // what about middle threads? | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Thread index: %d, start row: %d, row step: %d, field_pitch: %d\n", | |
| thread_index, row, row_step, field_pitch); | |
| } | |
| #endif | |
| */ | |
| // Loop until all of the rows have been processed | |
| for (;;) | |
| { | |
| // Wait for one row from each channel to invert the transform | |
| return_value = WaitForSingleObject(row_semaphore, 0); | |
| // Determine the index of this worker thread | |
| if (return_value == WAIT_OBJECT_0) | |
| { | |
| if(decoder->interlaced_worker.lock_init) | |
| { | |
| EnterCriticalSection(&decoder->interlaced_worker.lock); | |
| } | |
| row = decoder->interlaced_worker.current_row++; | |
| if(decoder->interlaced_worker.lock_init) | |
| LeaveCriticalSection(&decoder->interlaced_worker.lock); | |
| output_row_ptr = output; | |
| output_row_ptr += row * 2 * output_pitch; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| horizontal_lowlow[channel] += pitch*row; | |
| horizontal_lowhigh[channel] += pitch*row; | |
| horizontal_highlow[channel] += pitch*row; | |
| horizontal_highhigh[channel] += pitch*row; | |
| } | |
| } | |
| if (return_value == WAIT_OBJECT_0 && 0 <= row && row < half_height) | |
| { | |
| //PIXEL *line_buffer = (PIXEL *)(buffer + (2 * num_channels + 2) * temporal_row_size); | |
| PIXEL *line_buffer = (PIXEL *)(buffer + 2 * num_channels * temporal_row_size); | |
| // assert(0 <= row && row < half_height); | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Processing row: %d, thread index: %d, output: %d (0x%p)\n", | |
| row, thread_index, output_row_ptr); | |
| } | |
| #endif | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| //int pitch8s = horizontal_pitch8s[channel]; | |
| #if (0 && DEBUG) | |
| // Invert the horizontal transform by duplicating the lowpass pixels | |
| InvertHorizontalRowDuplicated16s(horizontal_lowlow[channel], lowlow_quantization[channel], | |
| (PIXEL8S *)horizontal_lowhigh[channel], lowhigh_quantization[channel], | |
| temporal_lowpass[channel], horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| #else | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| InvertHorizontalRow16s8sTo16sBuffered(horizontal_lowlow[channel], lowlow_quantization[channel], | |
| (PIXEL8S *)horizontal_lowhigh[channel], lowhigh_quantization[channel], | |
| temporal_lowpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| #endif | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| InvertHorizontalRow8sBuffered((PIXEL8S *)horizontal_highlow[channel], highlow_quantization[channel], | |
| (PIXEL8S *)horizontal_highhigh[channel], highhigh_quantization[channel], | |
| temporal_highpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| // Advance to the next row in each horizontal band in this channel | |
| //horizontal_lowlow[channel] += pitch; | |
| //horizontal_lowhigh[channel] += pitch; | |
| //horizontal_highlow[channel] += pitch; | |
| //horizontal_highhigh[channel] += pitch; | |
| } | |
| // The output width is twice the width of the wavelet bands | |
| output_width = 2 * horizontal_width[0]; | |
| // Adjust the frame width to fill to the end of each row | |
| //frame_width = output_pitch / 2; | |
| if (precision == CODEC_PRECISION_10BIT) | |
| { | |
| // Invert the temporal bands from all channels and pack output pixels | |
| switch (frame->format) | |
| { | |
| // Need to reduce the resolution from 10 bits to 8 bits during the inverse | |
| case DECODED_FORMAT_YUYV: | |
| InvertInterlacedRow16s10bitToYUV(temporal_lowpass, temporal_highpass, num_channels, | |
| output_row_ptr, output_pitch, output_width, frame_width, | |
| chroma_offset); | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| InvertInterlacedRow16s10bitToUYVY(temporal_lowpass, temporal_highpass, num_channels, | |
| output_row_ptr, output_pitch, output_width, frame_width, | |
| chroma_offset); | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| } | |
| else // Older code for 8-bit precision | |
| { | |
| int format; | |
| assert(precision == CODEC_PRECISION_8BIT); | |
| switch (frame->format) | |
| { | |
| case DECODED_FORMAT_YUYV: | |
| format = COLOR_FORMAT_YUYV; | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| format = COLOR_FORMAT_UYVY; | |
| break; | |
| } | |
| // Invert the temporal bands from all channels and pack output pixels | |
| InvertInterlacedRow16sToYUV(temporal_lowpass, temporal_highpass, num_channels, | |
| output_row_ptr, output_pitch, output_width, frame_width, | |
| chroma_offset, format); | |
| } | |
| // Advance to the next row in the input transforms | |
| //row += row_step; | |
| // Advance to the next row in the packed output image | |
| //output += field_pitch; | |
| } | |
| else | |
| { | |
| // No more rows to process | |
| break; | |
| } | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Finished transform, thread index: %d\n", thread_index); | |
| } | |
| #endif | |
| } | |
| #endif | |
| //#if BUILD_PROSPECT | |
| // Apply the inverse horizontal-temporal transform and output rows of luma and chroma | |
| #if 0 | |
| void TransformInverseFrameToRow16u(TRANSFORM *transform[], int frame_index, int num_channels, | |
| PIXEL16U *output, int output_pitch, FRAME_INFO *frame, | |
| char *buffer, size_t buffer_size, int chroma_offset, | |
| int precision) | |
| #else | |
| void TransformInverseFrameToRow16u(DECODER *decoder, TRANSFORM *transform[], int frame_index, int num_channels, | |
| PIXEL16U *output, int output_pitch, FRAME_INFO *frame, | |
| const SCRATCH *scratch, int chroma_offset, | |
| int precision) | |
| #endif | |
| { | |
| // Pointers to the rows in the horizontal wavelet for each channel | |
| PIXEL *horizontal_lowlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_lowhigh[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highhigh[TRANSFORM_MAX_CHANNELS]; | |
| // Horizontal wavelet band width and pitch | |
| int horizontal_width[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch[TRANSFORM_MAX_CHANNELS]; | |
| // Quantization factors | |
| int lowlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int lowhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| #if DEBUG | |
| size_t buffer_size = scratch->free_size; | |
| #endif | |
| // Buffers for the rows in the temporal wavelet (reused for each channel) | |
| PIXEL *temporal_lowpass; | |
| PIXEL *temporal_highpass; | |
| int output_row_width[TRANSFORM_MAX_CHANNELS]; | |
| // Dimensions of the reconstructed frame | |
| int frame_width = frame->width; | |
| int frame_height = frame->height; | |
| int half_height = frame_height / 2; | |
| size_t temporal_row_size = frame_width * sizeof(PIXEL); | |
| int field_pitch = 2 * output_pitch; | |
| int luma_width = frame_width; | |
| int chroma_width = luma_width/2; | |
| int channel; | |
| int row; | |
| #if (1 && DEBUG_ROW16U) | |
| PIXEL16U *output_buffer; | |
| #endif | |
| // This routine should only be called to decode rows of 16-bit luma and chroma | |
| //assert(frame->format == DECODED_FORMAT_YR16); | |
| // Round up the temporal row size to an integral number of cache lines | |
| temporal_row_size = ALIGN(temporal_row_size, _CACHE_LINE_SIZE); | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(buffer, _CACHE_LINE_SIZE)); | |
| // Check that the number of channels is reasonable | |
| assert(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Buffer must be large enough for two rows of temporal coefficients (lowpass and highpass) | |
| // plus the buffer used by the inverse horizontal transform for its intermediate results | |
| #if DEBUG | |
| assert((2 * temporal_row_size) <= buffer_size); | |
| #endif | |
| // Allocate buffers for one row of lowpass and highpass temporal coefficients | |
| temporal_lowpass = (PIXEL *)&buffer[0]; | |
| temporal_highpass = (PIXEL *)&buffer[temporal_row_size]; | |
| #if (1 && DEBUG_ROW16U) | |
| output_buffer = (PIXEL16U *)&buffer[2 * temporal_row_size]; | |
| #endif | |
| // Initialize the arrays of row pointers into the horizontal transform bands | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| #if (0 && DEBUG) | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| int i; | |
| sprintf(label, "Frame%d-%d-", frame_index, count); | |
| DumpPGM(label, wavelet, NULL); | |
| for (i = 1; i < wavelet->num_bands; i++) | |
| { | |
| sprintf(label, "Frame-%d-band%d-%d-", frame_index, i, count); | |
| DumpBandPGM(label, wavelet, i, NULL); | |
| } | |
| } | |
| count++; | |
| #endif | |
| // Initialize the row pointers into the horizontal bands | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| lowlow_quantization[channel] = wavelet->quantization[LL_BAND]; | |
| lowhigh_quantization[channel] = wavelet->quantization[LH_BAND]; | |
| highlow_quantization[channel] = wavelet->quantization[HL_BAND]; | |
| highhigh_quantization[channel] = wavelet->quantization[HH_BAND]; | |
| // Compute the pitch in units of pixels | |
| horizontal_pitch[channel] = wavelet->pitch/sizeof(PIXEL); | |
| // Remember the width of the horizontal wavelet rows for this channel | |
| horizontal_width[channel] = wavelet->width; | |
| // Compute the width of each row of output pixels | |
| output_row_width[channel] = (channel == 0) ? luma_width : chroma_width; | |
| } | |
| // Process one row at a time from each channel | |
| for (row = 0; row < half_height; row++) | |
| { | |
| #if (1 && DEBUG_ROW16U) | |
| PIXEL16U *output_row_ptr = output_buffer; | |
| PIXEL16U *planar_output[TRANSFORM_MAX_CHANNELS]; | |
| int planar_pitch[TRANSFORM_MAX_CHANNELS]; | |
| ROI strip = {luma_width, 2}; | |
| uint8_t *yuv_output = (uint8_t *)output; | |
| uint8_t *output1 = yuv_output; | |
| uint8_t *output2 = yuv_output + output_pitch; | |
| #else | |
| PIXEL16U *output_row_ptr = output; | |
| #endif | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| BypassHorizontalRow16s(horizontal_lowlow[channel], horizontal_lowhigh[channel], | |
| temporal_lowpass, horizontal_width[channel]); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| BypassHorizontalRow16s(horizontal_highlow[channel], horizontal_highhigh[channel], | |
| temporal_highpass, horizontal_width[channel]); | |
| } | |
| else | |
| { | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| InvertHorizontalRow16s(horizontal_lowlow[channel], horizontal_lowhigh[channel], | |
| temporal_lowpass, horizontal_width[channel]); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| InvertHorizontalRow16s(horizontal_highlow[channel], horizontal_highhigh[channel], | |
| temporal_highpass, horizontal_width[channel]); | |
| } | |
| //***DEBUG*** | |
| //ZeroMemory(temporal_highpass, temporal_row_size); | |
| //FillPixelMemory(temporal_highpass, temporal_row_size/sizeof(PIXEL), 50); | |
| // Advance to the next row in each horizontal band in this channel | |
| horizontal_lowlow[channel] += pitch; | |
| horizontal_lowhigh[channel] += pitch; | |
| horizontal_highlow[channel] += pitch; | |
| horizontal_highhigh[channel] += pitch; | |
| #if (1 && DEBUG_ROW16U) | |
| // Write the rows of 16-bit pixels to a temporary buffer | |
| planar_output[channel] = output_row_ptr; | |
| planar_pitch[channel] = output_pitch * sizeof(PIXEL); | |
| // Invert the temporal transform and output two rows of luma or chroma | |
| InvertInterlacedRow16sToRow16u(temporal_lowpass, temporal_highpass, | |
| planar_output[channel], planar_pitch[channel], | |
| output_row_width[channel], | |
| frame_width, chroma_offset, precision); | |
| //if (channel > 0) | |
| if (0) | |
| { | |
| uint8_t *output3 = (uint8_t *)planar_output[channel]; | |
| uint8_t *output4 = (uint8_t *)output3 + planar_pitch[channel]; | |
| int output_size = output_row_width[channel] * sizeof(PIXEL); | |
| int fill_value = (128 << 8); | |
| //ZeroMemory(output3, output_size); | |
| //ZeroMemory(output4, output_size); | |
| FillPixelMemory((PIXEL *)output3, output_row_width[channel], fill_value); | |
| FillPixelMemory((PIXEL *)output4, output_row_width[channel], fill_value); | |
| } | |
| #else | |
| // Invert the temporal transform and output two rows of luma or chroma | |
| InvertInterlacedRow16sToRow16u(temporal_lowpass, temporal_highpass, | |
| output_row_ptr, output_pitch, output_row_width[channel], | |
| frame_width, chroma_offset, precision); | |
| #endif | |
| // Advance the output row pointer to the next channel | |
| output_row_ptr += output_row_width[channel]; | |
| // Check the output row alignment | |
| assert(ISALIGNED16(output_row_ptr)); | |
| } | |
| // Advance to the next group of rows in the output image | |
| output += field_pitch/sizeof(PIXEL16U); | |
| } | |
| } | |
| //#endif | |
| #if _INTERLACED_WORKER_THREADS | |
| void TransformInverseFrameSectionToRow16u(DECODER *decoder, int thread_index, int frame_index, int num_channels, | |
| PIXEL16U *output, int output_pitch, FRAME_INFO *frame, | |
| int chroma_offset, int precision) | |
| { | |
| FILE *logfile = decoder->logfile; | |
| TRANSFORM **transform = decoder->transform; | |
| const SCRATCH *scratch = &decoder->scratch; | |
| // Pointers to the rows in the horizontal wavelet for each channel | |
| PIXEL *horizontal_lowlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_lowhigh[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highhigh[TRANSFORM_MAX_CHANNELS]; | |
| // Horizontal wavelet band width and pitch | |
| int horizontal_width[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch[TRANSFORM_MAX_CHANNELS]; | |
| // Quantization factors | |
| int lowlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int lowhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| size_t buffer_size = scratch->free_size; | |
| // Buffers for the rows in the temporal wavelet (reused for each channel) | |
| PIXEL *temporal_lowpass; | |
| PIXEL *temporal_highpass; | |
| int output_row_width[TRANSFORM_MAX_CHANNELS]; | |
| // Dimensions of the reconstructed frame | |
| int frame_width = frame->width; | |
| int frame_height = frame->height; | |
| int half_height = frame_height / 2; | |
| size_t temporal_row_size = frame_width * sizeof(PIXEL); | |
| int field_pitch = 2 * output_pitch; | |
| int luma_width = frame_width; | |
| int chroma_width = luma_width/2; | |
| int channel; | |
| int row; | |
| HANDLE row_semaphore = decoder->interlaced_worker.row_semaphore; | |
| int return_value; | |
| #if (1 && DEBUG_ROW16U) | |
| PIXEL16U *output_buffer; | |
| #endif | |
| // This routine should only be called to decode rows of 16-bit luma and chroma | |
| //assert(frame->format == DECODED_FORMAT_YR16); | |
| // Round up the temporal row size to an integral number of cache lines | |
| temporal_row_size = ALIGN(temporal_row_size, _CACHE_LINE_SIZE); | |
| #if 0 | |
| if (thread_index == 1) | |
| { | |
| // Skip over the buffer space used by the other thread | |
| size_t buffer_usage = 2 * temporal_row_size; | |
| buffer += buffer_usage; | |
| buffer_size -= buffer_usage; | |
| } | |
| #else | |
| // Divide the buffer space between the two threads | |
| buffer_size /= 4; | |
| buffer += buffer_size * thread_index; | |
| #endif | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(buffer, _CACHE_LINE_SIZE)); | |
| // Check that the number of channels is reasonable | |
| assert(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Buffer must be large enough for two rows of temporal coefficients (lowpass and highpass) | |
| // plus the buffer used by the inverse horizontal transform for its intermediate results | |
| assert((2 * temporal_row_size) <= buffer_size); | |
| // Allocate buffers for one row of lowpass and highpass temporal coefficients | |
| temporal_lowpass = (PIXEL *)&buffer[0]; | |
| temporal_highpass = (PIXEL *)&buffer[temporal_row_size]; | |
| #if (1 && DEBUG_ROW16U) | |
| output_buffer = (PIXEL16U *)&buffer[2 * temporal_row_size]; | |
| #endif | |
| // Initialize the arrays of row pointers into the horizontal transform bands | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| #if (0 && DEBUG) | |
| int static count = 0; | |
| if (count < 20) { | |
| char label[_MAX_PATH]; | |
| int i; | |
| sprintf(label, "Frame%d-%d-", frame_index, count); | |
| DumpPGM(label, wavelet, NULL); | |
| for (i = 1; i < wavelet->num_bands; i++) | |
| { | |
| sprintf(label, "Frame-%d-band%d-%d-", frame_index, i, count); | |
| DumpBandPGM(label, wavelet, i, NULL); | |
| } | |
| } | |
| count++; | |
| #endif | |
| // Initialize the row pointers into the horizontal bands | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| lowlow_quantization[channel] = wavelet->quantization[LL_BAND]; | |
| lowhigh_quantization[channel] = wavelet->quantization[LH_BAND]; | |
| highlow_quantization[channel] = wavelet->quantization[HL_BAND]; | |
| highhigh_quantization[channel] = wavelet->quantization[HH_BAND]; | |
| // Compute the pitch in units of pixels | |
| horizontal_pitch[channel] = wavelet->pitch/sizeof(PIXEL); | |
| // Remember the width of the horizontal wavelet rows for this channel | |
| horizontal_width[channel] = wavelet->width; | |
| // Compute the width of each row of output pixels | |
| output_row_width[channel] = (channel == 0) ? luma_width : chroma_width; | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Output buffer: %d (0x%p)\n", output, output); | |
| } | |
| #endif | |
| /* if (thread_index == 0) | |
| { | |
| row = 0; | |
| row_step = 1; | |
| } | |
| else if (thread_index == 1) | |
| { | |
| row = half_height - 1; | |
| row_step = -1; | |
| // Move to the bottom of the transform and process moving up | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int offset = horizontal_pitch[channel] * (half_height - 1); | |
| horizontal_lowlow[channel] += offset; | |
| horizontal_lowhigh[channel] += offset; | |
| horizontal_highlow[channel] += offset; | |
| horizontal_highhigh[channel] += offset; | |
| horizontal_pitch[channel] = NEG(horizontal_pitch[channel]); | |
| //horizontal_pitch8s[channel] = NEG(horizontal_pitch8s[channel]); | |
| } | |
| //output += field_pitch * (half_height - 1); | |
| output += (frame_height - 1) * output_pitch/sizeof(PIXEL16U); | |
| output_pitch = NEG(output_pitch); | |
| field_pitch = NEG(field_pitch); | |
| } | |
| else | |
| { | |
| assert(0); // middle threads | |
| } | |
| */ | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Thread index: %d, start row: %d, row step: %d, field_pitch: %d\n", | |
| thread_index, row, row_step, field_pitch); | |
| } | |
| #endif | |
| // Loop until all of the rows have been processed | |
| for (;;) | |
| { | |
| PIXEL16U *output_row_ptr; | |
| // Wait for one row from each channel to invert the transform | |
| return_value = WaitForSingleObject(row_semaphore, 0); | |
| // Determine the index of this worker thread | |
| if (return_value == WAIT_OBJECT_0) | |
| { | |
| if(decoder->interlaced_worker.lock_init) | |
| { | |
| EnterCriticalSection(&decoder->interlaced_worker.lock); | |
| } | |
| row = decoder->interlaced_worker.current_row++; | |
| if(decoder->interlaced_worker.lock_init) | |
| LeaveCriticalSection(&decoder->interlaced_worker.lock); | |
| output_row_ptr = output; | |
| output_row_ptr += row * output_pitch; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| horizontal_lowlow[channel] += pitch*row; | |
| horizontal_lowhigh[channel] += pitch*row; | |
| horizontal_highlow[channel] += pitch*row; | |
| horizontal_highhigh[channel] += pitch*row; | |
| } | |
| } | |
| if (return_value == WAIT_OBJECT_0 && 0 <= row && row < half_height) | |
| { | |
| assert(0 <= row && row < half_height); | |
| if(decoder->frame.resolution == DECODED_RESOLUTION_FULL) | |
| { | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| InvertHorizontalRow16s(horizontal_lowlow[channel], horizontal_lowhigh[channel], | |
| temporal_lowpass, horizontal_width[channel]); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| InvertHorizontalRow16s(horizontal_highlow[channel], horizontal_highhigh[channel], | |
| temporal_highpass, horizontal_width[channel]); | |
| // Invert the temporal transform and output two rows of luma or chroma | |
| InvertInterlacedRow16sToRow16u(temporal_lowpass, temporal_highpass, | |
| output_row_ptr, output_pitch, output_row_width[channel], | |
| frame_width, chroma_offset, precision); | |
| // Advance the output row pointer to the next channel | |
| output_row_ptr += output_row_width[channel]; | |
| } | |
| } | |
| else if(decoder->frame.resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| BypassHorizontalRow16s(horizontal_lowlow[channel], horizontal_lowhigh[channel], | |
| temporal_lowpass, horizontal_width[channel]); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| BypassHorizontalRow16s(horizontal_highlow[channel], horizontal_highhigh[channel], | |
| temporal_highpass, horizontal_width[channel]); | |
| // Invert the temporal transform and output two rows of luma or chroma | |
| InvertInterlacedRow16sToRow16u(temporal_lowpass, temporal_highpass, | |
| output_row_ptr, output_pitch, output_row_width[channel], | |
| frame_width, chroma_offset, precision); | |
| // Advance the output row pointer to the next channel | |
| output_row_ptr += output_row_width[channel]; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| // No more rows to process | |
| break; | |
| } | |
| } | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Finished transform, thread index: %d\n", thread_index); | |
| } | |
| #endif | |
| } | |
| #endif | |
| #if 0 | |
| DWORD WINAPI TransformInverseFrameToRow16utopThread(LPVOID param) | |
| { | |
| struct data | |
| { | |
| TRANSFORM *transform[3]; | |
| int frame_index; | |
| int num_channels; | |
| uint8_t *output; | |
| int output_pitch; | |
| FRAME_INFO *info; | |
| SCRATCH *scratch; | |
| int chroma_offset; | |
| int precision; | |
| } *dptr; | |
| dptr = (struct data *)param; | |
| TransformInverseFrameToRow16utop(dptr->transform, dptr->frame_index, dptr->num_channels, | |
| (PIXEL16U *)dptr->output, dptr->output_pitch, dptr->info, | |
| dptr->scratch, dptr->chroma_offset, dptr->precision); | |
| return 0; | |
| } | |
| DWORD WINAPI TransformInverseFrameToRow16ubottomThread(LPVOID param) | |
| { | |
| struct data | |
| { | |
| TRANSFORM *transform[3]; | |
| int frame_index; | |
| int num_channels; | |
| uint8_t *output; | |
| int output_pitch; | |
| FRAME_INFO *info; | |
| SCRATCH *scratch; | |
| int chroma_offset; | |
| int precision; | |
| } *dptr; | |
| dptr = (struct data *)param; | |
| TransformInverseFrameToRow16ubottom(dptr->transform, dptr->frame_index, dptr->num_channels, | |
| (PIXEL16U *)dptr->output, dptr->output_pitch, dptr->info, | |
| dptr->scratch, dptr->chroma_offset, dptr->precision); | |
| return 0; | |
| } | |
| #endif | |
| extern void fast_srand( int seed ); | |
| // Apply the inverse horizontal-temporal transform and pack the output into a buffer | |
| #if 0 | |
| void TransformInverseFrameToBuffer(TRANSFORM *transform[], int frame_index, int num_channels, | |
| uint8_t *output, int output_pitch, FRAME_INFO *frame, | |
| char *buffer, size_t buffer_size, int chroma_offset, | |
| int precision) | |
| #else | |
| void TransformInverseFrameToBuffer(TRANSFORM *transform[], int frame_index, int num_channels, | |
| uint8_t *output, int output_pitch, FRAME_INFO *frame, | |
| const SCRATCH *scratch, int chroma_offset, int precision) | |
| #endif | |
| { | |
| // Pointers to the rows in the horizontal wavelet for each channel | |
| PIXEL *horizontal_lowlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_lowhigh[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highlow[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *horizontal_highhigh[TRANSFORM_MAX_CHANNELS]; | |
| // Horizontal wavelet band width and pitch | |
| int horizontal_width[TRANSFORM_MAX_CHANNELS]; | |
| int horizontal_pitch[TRANSFORM_MAX_CHANNELS]; | |
| //int horizontal_pitch8s[TRANSFORM_MAX_CHANNELS]; | |
| // Quantization factors | |
| int lowlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int lowhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highlow_quantization[TRANSFORM_MAX_CHANNELS]; | |
| int highhigh_quantization[TRANSFORM_MAX_CHANNELS]; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| size_t buffer_size = scratch->free_size; | |
| // Pointers to the rows in the temporal wavelet for each channel | |
| PIXEL *temporal_lowpass[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL *temporal_highpass[TRANSFORM_MAX_CHANNELS]; | |
| // Dimensions of the reconstructed frame | |
| int frame_width = frame->width; | |
| int frame_height = frame->height; | |
| int half_height = frame_height / 2; | |
| size_t temporal_row_size = frame_width * sizeof(PIXEL); | |
| size_t temporal_buffer_size = 2 * num_channels * temporal_row_size; | |
| #if DEBUG | |
| size_t yuv_row_size = frame_width * 2; | |
| #endif | |
| char *yuv_buffer; | |
| size_t yuv_buffer_size; | |
| int field_pitch = 2 * output_pitch; | |
| int format = frame->format; | |
| bool inverted = (format == DECODED_FORMAT_RGB24 || format == DECODED_FORMAT_RGB32); | |
| int output_width; | |
| int channel; | |
| int row; | |
| // Round up the temporal row size to an integral number of cache lines | |
| temporal_row_size = ALIGN(temporal_row_size, _CACHE_LINE_SIZE); | |
| // Check that the buffer starts on a cache line boundary | |
| assert(ISALIGNED(buffer, _CACHE_LINE_SIZE)); | |
| // Check that the number of channels is reasonable | |
| assert(0 < num_channels && num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Check that the buffer is large enough | |
| assert((2 * num_channels * temporal_row_size) <= buffer_size); | |
| // Allocate buffers for a single row of lowpass and highpass temporal coefficients | |
| // and initialize the arrays of row pointers into the horizontal transform bands | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| // Initialize the row pointers into the horizontal bands | |
| horizontal_lowlow[channel] = wavelet->band[LL_BAND]; | |
| horizontal_lowhigh[channel] = wavelet->band[LH_BAND]; | |
| horizontal_highlow[channel] = wavelet->band[HL_BAND]; | |
| horizontal_highhigh[channel] = wavelet->band[HH_BAND]; | |
| lowlow_quantization[channel] = wavelet->quantization[LL_BAND]; | |
| lowhigh_quantization[channel] = wavelet->quantization[LH_BAND]; | |
| highlow_quantization[channel] = wavelet->quantization[HL_BAND]; | |
| highhigh_quantization[channel] = wavelet->quantization[HH_BAND]; | |
| // Compute the pitch in units of pixels | |
| horizontal_pitch[channel] = wavelet->pitch/sizeof(PIXEL); | |
| // Compute the 8-bit pitch in units of pixels | |
| //horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL); | |
| //horizontal_pitch8s[channel] = wavelet->pitch8s/sizeof(PIXEL8S); | |
| // Remember the width of the horizontal wavelet rows for this channel | |
| horizontal_width[channel] = wavelet->width; | |
| // Divide the buffer into temporal lowpass and highpass rows | |
| temporal_lowpass[channel] = (PIXEL *)(buffer + (2 * channel) * temporal_row_size); | |
| temporal_highpass[channel] = (PIXEL *)(buffer + (2 * channel + 1) * temporal_row_size); | |
| } | |
| // Allocate buffer space for the intermediate YUV data | |
| yuv_buffer = buffer + temporal_buffer_size; | |
| yuv_buffer_size = buffer_size - temporal_buffer_size; | |
| #if DEBUG | |
| assert(yuv_buffer_size >= 2 * yuv_row_size); | |
| #endif | |
| if (inverted) | |
| { | |
| output += (frame_height - 1) * output_pitch; | |
| output_pitch = (- output_pitch); | |
| field_pitch = (- field_pitch); | |
| } | |
| // Process one row at a time from each channel | |
| for (row = 0; row < half_height; row++) | |
| { | |
| PIXEL *line_buffer = (PIXEL *)(buffer + (2 * num_channels + 2) * temporal_row_size); | |
| // Invert the horizontal transform applied to the temporal bands in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| int pitch = horizontal_pitch[channel]; | |
| //int pitch8s = horizontal_pitch8s[channel]; | |
| // Invert the horizontal transform applied to the temporal lowpass row | |
| InvertHorizontalRow16s8sTo16sBuffered(horizontal_lowlow[channel], lowlow_quantization[channel], | |
| (PIXEL8S *)horizontal_lowhigh[channel], lowhigh_quantization[channel], | |
| temporal_lowpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| // Invert the horizontal transform applied to the temporal highpass row | |
| InvertHorizontalRow8sBuffered((PIXEL8S *)horizontal_highlow[channel], highlow_quantization[channel], | |
| (PIXEL8S *)horizontal_highhigh[channel], highhigh_quantization[channel], | |
| temporal_highpass[channel], | |
| horizontal_width[channel], | |
| (PIXEL *)line_buffer); | |
| // Advance to the next row in each horizontal band in this channel | |
| horizontal_lowlow[channel] += pitch; | |
| horizontal_lowhigh[channel] += pitch; | |
| horizontal_highlow[channel] += pitch; | |
| horizontal_highhigh[channel] += pitch; | |
| } | |
| // The output width is twice the width of the wavelet bands | |
| output_width = 2 * horizontal_width[0]; | |
| // Adjust the frame width to fill to the end of each row | |
| //frame_width = output_pitch / 2; | |
| //#if BUILD_PROSPECT | |
| if (format == DECODED_FORMAT_V210 || format == DECODED_FORMAT_YU64) | |
| { | |
| // Invert the temporal bands from all channels and pack as V210 output | |
| InvertInterlacedRow16sToV210(temporal_lowpass, temporal_highpass, num_channels, | |
| output, output_pitch, output_width, frame_width, | |
| yuv_buffer, yuv_buffer_size, format, chroma_offset, precision); | |
| } | |
| else | |
| //#endif | |
| { | |
| // Invert the temporal bands from all channels and pack as 8-bit output | |
| InvertInterlacedRow16s(temporal_lowpass, temporal_highpass, num_channels, | |
| output, output_pitch, output_width, frame_width, | |
| yuv_buffer, yuv_buffer_size, format, frame->colorspace, | |
| chroma_offset, precision, row); | |
| } | |
| // Advance to the next row in the packed output image | |
| output += field_pitch; | |
| } | |
| } | |
| void CopyImageToBuffer(IMAGE *image, uint8_t *output_buffer, int32_t output_pitch, int format) | |
| { | |
| bool inverted = false; | |
| size_t output_size; | |
| START(tk_convert); | |
| // Determine the type of conversion | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| ConvertImageToRGB(image, output_buffer, output_pitch, COLOR_FORMAT_RGB24, inverted); | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| ConvertImageToRGB(image, output_buffer, output_pitch, COLOR_FORMAT_RGB32, inverted); | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_YUYV_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| case DECODED_FORMAT_YUYV: | |
| ConvertImageToYUV(image, output_buffer, output_pitch, COLOR_FORMAT_YUYV, inverted); | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_UYVY_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| case DECODED_FORMAT_UYVY: | |
| ConvertImageToYUV(image, output_buffer, output_pitch, COLOR_FORMAT_UYVY, inverted); | |
| break; | |
| default: // Unsupported format (return a blank frame) | |
| assert(0); | |
| output_size = image->height * output_pitch; | |
| memset(output_buffer, COLOR_CHROMA_ZERO, output_size); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| void SideLowpass16s10bitToYUYV(IMAGE *images[], uint8_t *output_buffer, int output_width, int output_height, | |
| int output_pitch, bool inverted) | |
| { | |
| IMAGE *y_image = images[0]; | |
| IMAGE *u_image = images[1]; | |
| IMAGE *v_image = images[2]; | |
| int width = y_image->width; | |
| int height = output_height; | |
| PIXEL *y_row_ptr = y_image->band[0]; | |
| PIXEL *u_row_ptr = u_image->band[0]; | |
| PIXEL *v_row_ptr = v_image->band[0]; | |
| int y_pitch = y_image->pitch/sizeof(PIXEL); | |
| int u_pitch = u_image->pitch/sizeof(PIXEL); | |
| int v_pitch = v_image->pitch/sizeof(PIXEL); | |
| uint8_t *outrow = output_buffer; | |
| uint8_t *outptr; | |
| int row, column; | |
| // Definitions for optimization | |
| //const int column_step = 2 * sizeof(__m64); | |
| // Column at which post processing must begin | |
| //int post_column = width - (width % column_step); | |
| // The output pitch should be a positive number before inversion | |
| assert(output_pitch > 0); | |
| // Should the image be inverted? | |
| if (inverted) { | |
| outrow += (height - 1) * output_pitch; // Start at the bottom row | |
| output_pitch = NEG(output_pitch); // Negate the pitch to go up | |
| } | |
| for (row = 0; row < height; row++) | |
| { | |
| outptr = outrow; | |
| // Fill the rest of the output row | |
| for (column = 0; column < width; column+=4) | |
| { | |
| int chroma_column = column>>1; | |
| *(outptr++) = SATURATE_8U((y_row_ptr[column]+y_row_ptr[column+1])>>5); | |
| *(outptr++) = SATURATE_8U((v_row_ptr[chroma_column]+v_row_ptr[chroma_column+1])>>5); | |
| *(outptr++) = SATURATE_8U((y_row_ptr[column+2]+y_row_ptr[column+3])>>5); | |
| *(outptr++) = SATURATE_8U((u_row_ptr[chroma_column]+u_row_ptr[chroma_column+1])>>5); | |
| } | |
| // Advance to the next rows in the input and output images | |
| y_row_ptr += y_pitch;// 3D Work | |
| u_row_ptr += u_pitch; | |
| v_row_ptr += v_pitch; | |
| outrow += output_pitch; | |
| } | |
| } | |
| // Convert 16-bit signed lowpass data into packed RGB/YUV and store it in the output buffer | |
| void CopyLowpass16sToBuffer(DECODER *decoder, IMAGE *images[], int num_channels, uint8_t *output_buffer, int32_t output_pitch, | |
| FRAME_INFO *info, int chroma_offset, int precision, int encode_format, int whitebitdepth) | |
| { | |
| //IMAGE *image = frame->channel[0]; | |
| bool inverted = false; | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| int descale = precision - 8; | |
| // Get the color format from the decoded format | |
| int color_format = info->format & COLOR_FORMAT_MASK; | |
| // Must compile this routine with switches set for decoding to 8-bit unsigned pixels | |
| #if !defined(_DECODE_FRAME_8U) || (_DECODE_FRAME_8U == 0) | |
| assert(0); | |
| return; | |
| #endif | |
| START(tk_convert); | |
| #if 0 | |
| // Fill the output buffer with blank values | |
| EraseOutputBuffer(output_buffer, info->width, info->height, output_pitch, info->format); | |
| #endif | |
| // Determine the type of conversion | |
| switch (info->format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| { | |
| ConvertLowpass16sRGB48ToRGB(images, output_buffer, output_width, output_height, output_pitch, | |
| COLOR_FORMAT_RGB24, info->colorspace, inverted, descale, num_channels); | |
| } | |
| else | |
| { | |
| ConvertLowpass16sToRGBNoIPPFast(images, output_buffer, output_width, output_height, output_pitch, | |
| COLOR_FORMAT_RGB24, info->colorspace, inverted, descale); | |
| } | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| { | |
| ConvertLowpass16sRGB48ToRGB(images, output_buffer, output_width, output_height, output_pitch, | |
| COLOR_FORMAT_RGB32, info->colorspace, inverted, descale, num_channels); | |
| } | |
| else | |
| { | |
| ConvertLowpass16sToRGBNoIPPFast(images, output_buffer, output_width, output_height, output_pitch, | |
| COLOR_FORMAT_RGB32, info->colorspace, inverted, descale); | |
| } | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| if(encode_format == ENCODED_FORMAT_BAYER) | |
| { | |
| ConvertLowpass16sBayerToRGB48(images, output_buffer, output_width, output_height, | |
| output_pitch, 2, num_channels); | |
| } | |
| else if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| { | |
| int scale = 1; | |
| if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| scale = 2; | |
| ConvertLowpass16sRGB48ToRGB48(images, output_buffer, output_width, output_height, | |
| output_pitch, scale, num_channels); | |
| } | |
| else | |
| { | |
| ConvertLowpass16sYUVtoRGB48(images, (uint8_t *)output_buffer, output_width, | |
| output_height, output_pitch, info->colorspace, inverted, descale, | |
| info->format, whitebitdepth); | |
| } | |
| break; | |
| case DECODED_FORMAT_RG64: | |
| if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| { | |
| ConvertLowpass16sRGBA64ToRGBA64(images, output_buffer, output_width, output_height, output_pitch, | |
| descale, num_channels, info->format & 0xffff); | |
| } | |
| else | |
| { | |
| assert(0); | |
| } | |
| break; | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| if(encode_format == ENCODED_FORMAT_RGB_444 || encode_format == ENCODED_FORMAT_RGBA_4444) | |
| { | |
| ConvertLowpass16sRGBA64ToRGBA64(images, output_buffer, output_width, output_height, output_pitch, | |
| descale, num_channels, info->format & 0xffff); | |
| } | |
| else | |
| { | |
| ConvertLowpass16sYUVtoRGB48(images, (uint8_t *)output_buffer, output_width, | |
| output_height, output_pitch, info->colorspace, inverted, descale, | |
| info->format, whitebitdepth); | |
| } | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_YUYV_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| case DECODED_FORMAT_YUYV: | |
| case DECODED_FORMAT_UYVY: | |
| if (precision == CODEC_PRECISION_10BIT) | |
| { | |
| int lineskip = 1; // 3D Work | |
| int pitch = output_pitch; | |
| if(decoder->channel_decodes > 1 && decoder->frame.format == DECODED_FORMAT_YUYV) | |
| { | |
| if(decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC || decoder->channel_blend_type == BLEND_LINE_INTERLEAVED) // 3d Work | |
| { | |
| lineskip = 2; | |
| if(decoder->channel_blend_type == 3) | |
| pitch *= 2; | |
| } | |
| } | |
| if((decoder->channel_blend_type == BLEND_SIDEBYSIDE_ANAMORPHIC || decoder->channel_blend_type == BLEND_FREEVIEW) && decoder->frame.format == DECODED_FORMAT_YUYV) //side by side | |
| { | |
| SideLowpass16s10bitToYUYV(images, output_buffer, output_width, output_height, pitch, inverted); | |
| } | |
| else | |
| { | |
| //ConvertLowpass16s10bitToYUV(images, output_buffer, output_width, output_height, pitch, COLOR_FORMAT_YUYV, inverted, lineskip); | |
| ConvertLowpass16s10bitToYUV(images, output_buffer, output_width, output_height, pitch, color_format, inverted, lineskip); | |
| } | |
| } | |
| else | |
| { | |
| //ConvertLowpass16sToYUV(images, output_buffer, output_width, output_height, output_pitch, COLOR_FORMAT_YUYV, inverted); | |
| ConvertLowpass16sToYUV(images, output_buffer, output_width, output_height, output_pitch, color_format, inverted); | |
| } | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_UYVY_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| #if 0 | |
| case DECODED_FORMAT_UYVY: | |
| ConvertLowpass16sToYUV(images, output_buffer, output_width, output_height, output_pitch, COLOR_FORMAT_UYVY, inverted); | |
| break; | |
| #endif | |
| //#if BUILD_PROSPECT | |
| case DECODED_FORMAT_V210: | |
| if (precision == CODEC_PRECISION_10BIT) | |
| { | |
| ConvertLowpass16s10bitToV210(images, output_buffer, output_width, output_height, output_pitch, COLOR_FORMAT_V210, inverted); | |
| } | |
| else | |
| { | |
| //ConvertLowpass16sToV210(images, output_buffer, output_width, output_pitch, COLOR_FORMAT_V210, inverted); | |
| assert(0); | |
| } | |
| break; | |
| //#endif | |
| case DECODED_FORMAT_YU64: | |
| // DAN04262004 | |
| ConvertLowpass16sToYUV64(images, output_buffer, output_width, output_height, output_pitch, COLOR_FORMAT_YU64, inverted, precision); | |
| break; | |
| //#if BUILD_PROSPECT | |
| case DECODED_FORMAT_YR16: | |
| ConvertLowpass16sToYR16(images, output_buffer, output_width, output_height, output_pitch, COLOR_FORMAT_YR16, inverted, precision); | |
| break; | |
| //#endif | |
| default: // Unsupported format (output a blank frame) | |
| assert(0); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| void ConvertYUVStripPlanarToBuffer(uint8_t *planar_output[], int planar_pitch[], ROI roi, | |
| uint8_t *output_buffer, int output_pitch, int frame_width, | |
| int format, int colorspace) | |
| { | |
| bool inverted = false; | |
| int output_width = roi.width; | |
| #if !defined(_DECODE_FRAME_8U) || (_DECODE_FRAME_8U == 0) | |
| #error Must set compile-time switches to decode to 8-bit pixels | |
| #endif | |
| START(tk_convert); | |
| #if _ENCODE_CHROMA_OFFSET | |
| #error Cannot handle images encoded with a non-zero chroma offset | |
| #endif | |
| // Determine the type of conversion | |
| switch(format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| ConvertPlanarYUVToRGB(planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_RGB24, colorspace, inverted); | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| ConvertPlanarYUVToRGB(planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_RGB32, colorspace, inverted); | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_YUYV_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| case DECODED_FORMAT_YUYV: | |
| ConvertYUVStripPlanarToPacked(planar_output, planar_pitch, roi, | |
| output_buffer, output_pitch, frame_width, format); | |
| break; | |
| #if 0 | |
| case DECODED_FORMAT_UYVY_INVERTED: | |
| inverted = true; | |
| // Fall through and convert to YUV (first image row displayed at the bottom) | |
| #endif | |
| case DECODED_FORMAT_UYVY: | |
| ConvertPlanarYUVToUYVY(planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_UYVY, colorspace, inverted); | |
| break; | |
| default: // Unsupported format (output a blank frame) | |
| assert(0); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| void ConvertRow16uToDitheredBuffer(DECODER *decoder, uint8_t *planar_output[], int planar_pitch[], ROI roi, | |
| uint8_t *output_buffer, int output_pitch, int frame_width, | |
| int format, int colorspace) | |
| { | |
| bool inverted = false; | |
| int output_width = roi.width; | |
| START(tk_convert); | |
| // Determine the type of conversion | |
| switch(format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| //ConvertPlanarYUVToRGB | |
| ConvertRow16uToDitheredRGB(decoder, planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_RGB24, colorspace, inverted); | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| ConvertRow16uToDitheredRGB(decoder, planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_RGB32, colorspace, inverted); | |
| break; | |
| case COLOR_FORMAT_WP13: | |
| case COLOR_FORMAT_B64A: | |
| case COLOR_FORMAT_RG48: | |
| case COLOR_FORMAT_R210: | |
| case COLOR_FORMAT_DPX0: | |
| case COLOR_FORMAT_RG30: | |
| case COLOR_FORMAT_AR10: | |
| case COLOR_FORMAT_AB10: | |
| ConvertYUVRow16uToBGRA64(planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, format, colorspace, NULL, NULL); | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| assert(0);// These routines are not yet updated for ROW16u inputs | |
| ConvertYUVStripPlanarToPacked(planar_output, planar_pitch, roi, | |
| output_buffer, output_pitch, frame_width, format); | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| assert(0);// These routines are not yet updated for ROW16u inputs | |
| ConvertPlanarYUVToUYVY(planar_output, planar_pitch, roi, output_buffer, output_width, output_pitch, | |
| COLOR_FORMAT_UYVY, colorspace, inverted); | |
| break; | |
| default: // Unsupported format (output a blank frame) | |
| assert(0); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| // Convert one row of packed YUYV to the specified color | |
| void ConvertRowYUYV(uint8_t *input, uint8_t *output, int length, int format, int colorspace, int precision) | |
| { | |
| size_t row_size = 2 * length; | |
| bool inverted = false; | |
| START(tk_convert); | |
| // Determine the type of color conversion | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| ConvertYUYVRowToRGB(input, output, length, COLOR_FORMAT_RGB24, colorspace, precision); | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| // Fall through and convert to RGB (first image row displayed at the bottom) | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| ConvertYUYVRowToRGB(input, output, length, COLOR_FORMAT_RGB32, colorspace, precision); | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| if(precision == 8) | |
| memcpy(output, input, row_size); | |
| else | |
| { | |
| //need to dither to 8-bit | |
| assert(0); | |
| } | |
| break; | |
| case DECODED_FORMAT_UYVY: | |
| if(precision == 8) | |
| ConvertYUYVRowToUYVY(input, output, length, COLOR_FORMAT_UYVY); | |
| else | |
| { | |
| //need to dither to 8-bit | |
| assert(0); | |
| } | |
| break; | |
| //#if BUILD_PROSPECT | |
| case DECODED_FORMAT_V210: | |
| assert(0); // should get here with 8bit data. | |
| //ConvertYUYVRowToV210(input, output, length, COLOR_FORMAT_V210); | |
| break; | |
| case DECODED_FORMAT_YU64: | |
| assert(0); // should get here with 8bit data. | |
| //ConvertYUYVRowToYU64(input, output, length, COLOR_FORMAT_YU64); | |
| break; | |
| case DECODED_FORMAT_BYR3: | |
| case DECODED_FORMAT_BYR4: | |
| assert(0); // should get here with 8bit data. | |
| //ConvertYUYVRowToYU64(input, output, length, COLOR_FORMAT_YU64); | |
| break; | |
| //#endif | |
| default: // Unsupported format (output a blank frame) | |
| assert(0); | |
| memset(output, 0, row_size); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| #if _THREADED_DECODER | |
| IMAGE *GetWaveletThreadSafe(DECODER *decoder, TRANSFORM *transform, int index, | |
| int width, int height, int level, int type) | |
| { | |
| IMAGE *wavelet = transform->wavelet[index]; | |
| assert(decoder != NULL && transform != NULL); | |
| if (decoder != NULL && transform != NULL) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| // Lock access to the wavelet data | |
| #if _DELAYED_THREAD_START==0 | |
| Lock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| // Get the wavelet from the transform data structure (thread safe) | |
| wavelet = transform->wavelet[index]; | |
| // Allocate (or reallocate) the wavelet | |
| #if _ALLOCATOR | |
| wavelet = ReallocWaveletEx(decoder->allocator, wavelet, width, height, level, type); | |
| #else | |
| wavelet = ReallocWaveletEx(wavelet, width, height, level, type); | |
| #endif | |
| // Save this wavelet in the transform data structure | |
| transform->wavelet[index] = wavelet; | |
| // Unlock access to the wavelet data | |
| #if _DELAYED_THREAD_START==0 | |
| Unlock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| } | |
| return wavelet; | |
| } | |
| // Update the codec state with the information in a tag value pair | |
| CODEC_ERROR UpdateCodecState(DECODER *decoder, BITSTREAM *input, CODEC_STATE *codec, TAGWORD tag, TAGWORD value) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| bool optional = false; | |
| int chunksize = 0; | |
| bool result; | |
| // Is this an optional tag? | |
| if (tag < 0) { | |
| tag = NEG(tag); | |
| optional = true; | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "UpdateCodecState tag: %d, value: %d, optional: %d\n", | |
| tag, value, optional); | |
| } | |
| #endif | |
| switch (tag) | |
| { | |
| case CODEC_TAG_ZERO: // Used internally | |
| assert(0); // Should not occur in the bitstream | |
| error = CODEC_ERROR_INVALID_BITSTREAM; | |
| break; | |
| case CODEC_TAG_SAMPLE: // Type of sample | |
| //assert(0); | |
| if (value == SAMPLE_TYPE_CHANNEL) | |
| { | |
| result = DecodeSampleChannelHeader(decoder, input); | |
| if (!result) | |
| error = CODEC_ERROR_DECODE_SAMPLE_CHANNEL_HEADER; | |
| else | |
| error = CODEC_ERROR_OKAY; | |
| } | |
| break; | |
| case CODEC_TAG_INDEX: // Sample index table | |
| //assert(0); // Need to figure out how to return the group index | |
| { | |
| int count = value; | |
| uint32_t *index = (uint32_t *)(&codec->channel_size[0]); | |
| DecodeGroupIndex(input, index, count); | |
| codec->num_channels = count; | |
| } | |
| break; | |
| case CODEC_TAG_SUBBAND: // Has the decoder encountered a subband? | |
| { // This tag is obsolete and not used in modern streams | |
| int subband = value; | |
| // Check that the subband number makes sense | |
| assert(0 <= subband && subband <= codec->max_subband); | |
| if (! (0 <= subband && subband <= codec->max_subband)) | |
| { | |
| error = CODEC_ERROR_DECODING_SUBBAND; | |
| break; | |
| } | |
| // Decompress the subband | |
| result = DecodeSampleSubband(decoder, input, subband); | |
| if (!result) | |
| error = CODEC_ERROR_DECODING_SUBBAND; | |
| else | |
| error = CODEC_ERROR_OKAY; | |
| } | |
| break; | |
| case CODEC_TAG_BAND_HEADER: //CODEC_TAG_BAND_DIVISOR: // Band divisor. this is last TAG before subband data so act. | |
| codec->band.divisor = value; // This tag value pair encodes the band divisor which is obsolete | |
| { | |
| // This tag value pair marks the beginning of the encoded coefficients | |
| // The subband number has already been decoded | |
| int subband = codec->band.subband; | |
| result = DecodeSampleSubband(decoder, input, subband); | |
| if (!result) | |
| error = CODEC_ERROR_DECODING_SUBBAND; | |
| else | |
| error = CODEC_ERROR_OKAY; | |
| } | |
| break; | |
| case CODEC_TAG_ENTRY: // Entry in sample index | |
| assert(0); // Need to figure out how to return the group index | |
| break; | |
| case CODEC_TAG_MARKER: // Bitstream marker | |
| { | |
| int marker = value; | |
| uint8_t *current_position; | |
| // Save the current bitstream position | |
| current_position = GetBitstreamPosition(input); | |
| current_position -= 4; // Step back to before the GetSegment i.e. the TAG | |
| if (IsLowPassHeaderMarker(marker)) | |
| { | |
| // Save the bitstream position for the start of the channel | |
| codec->channel_position = current_position; | |
| } | |
| else if (IsLowPassBandMarker(marker)) | |
| { | |
| int subband = 0; | |
| result = DecodeSampleSubband(decoder, input, subband); | |
| if (!result) | |
| error = CODEC_ERROR_DECODING_SUBBAND; | |
| else | |
| error = CODEC_ERROR_OKAY; | |
| } | |
| } | |
| break; | |
| case CODEC_TAG_VERSION_MAJOR: // Version | |
| assert(0); | |
| break; | |
| case CODEC_TAG_VERSION_MINOR: // Minor version number | |
| assert(0); | |
| break; | |
| case CODEC_TAG_VERSION_REVISION: // Revision number | |
| assert(0); | |
| break; | |
| case CODEC_TAG_VERSION_EDIT: // Edit number | |
| assert(0); | |
| break; | |
| case CODEC_TAG_SEQUENCE_FLAGS: // Video sequence flags | |
| assert(0); | |
| break; | |
| case CODEC_TAG_TRANSFORM_TYPE: // Type of transform | |
| assert(TRANSFORM_TYPE_FIRST <= value && value <= TRANSFORM_TYPE_LAST); | |
| if (TRANSFORM_TYPE_FIRST <= value && value <= TRANSFORM_TYPE_LAST) | |
| { | |
| int i; | |
| codec->transform_type = value; | |
| for(i=0;i<TRANSFORM_MAX_CHANNELS;i++) | |
| { | |
| TRANSFORM *transform = decoder->transform[i]; | |
| if(transform) | |
| { | |
| GetTransformPrescale(transform, codec->transform_type, codec->precision); | |
| } | |
| } | |
| } | |
| else | |
| error = CODEC_ERROR_TRANSFORM_TYPE; | |
| break; | |
| case CODEC_TAG_NUM_FRAMES: // Number of frames in the group | |
| assert(0 <= value && value <= TRANSFORM_NUM_FRAMES); | |
| if (0 <= value && value <= TRANSFORM_NUM_FRAMES) | |
| codec->num_frames = value; | |
| else | |
| error = CODEC_ERROR_NUM_FRAMES; | |
| break; | |
| case CODEC_TAG_NUM_CHANNELS: // Number of channels in the transform | |
| assert(value <= CODEC_MAX_CHANNELS); | |
| if (value <= CODEC_MAX_CHANNELS) | |
| codec->num_channels = value; | |
| else | |
| error = CODEC_ERROR_NUM_CHANNELS; | |
| break; | |
| case CODEC_TAG_NUM_WAVELETS: // Number of wavelets in the transform | |
| assert(0 < value && value <= TRANSFORM_NUM_WAVELETS); | |
| if (0 < value && value <= TRANSFORM_NUM_WAVELETS) | |
| codec->num_wavelets = value; | |
| else | |
| error = CODEC_ERROR_NUM_WAVELETS; | |
| break; | |
| case CODEC_TAG_NUM_SUBBANDS: // Number of encoded subbands | |
| assert(0 < value && value <= TRANSFORM_NUM_SUBBANDS); | |
| if (0 < value && value <= TRANSFORM_NUM_SUBBANDS) | |
| codec->num_subbands = value; | |
| else | |
| error = CODEC_ERROR_NUM_SUBBANDS; | |
| break; | |
| case CODEC_TAG_NUM_SPATIAL: // Number of spatial levels | |
| assert(0 < value && value <= TRANSFORM_NUM_SPATIAL); | |
| if (0 < value && value <= TRANSFORM_NUM_SPATIAL) | |
| codec->num_spatial = value; | |
| else | |
| error = CODEC_ERROR_NUM_SPATIAL; | |
| break; | |
| case CODEC_TAG_FIRST_WAVELET: // Type of the first wavelet | |
| assert(value == TRANSFORM_FIRST_WAVELET); | |
| if (value == TRANSFORM_FIRST_WAVELET) | |
| codec->first_wavelet = value; | |
| else | |
| error = CODEC_ERROR_FIRST_WAVELET; | |
| break; | |
| case CODEC_TAG_CHANNEL_SIZE: // Number of bytes in each channel | |
| assert(0); | |
| break; | |
| case CODEC_TAG_GROUP_TRAILER: // Group trailer and checksum | |
| codec->sample_done = true; | |
| break; | |
| case CODEC_TAG_FRAME_TYPE: // Type of frame marks the frame start | |
| codec->frame.type = value; | |
| break; | |
| case CODEC_TAG_FRAME_WIDTH: // Width of the frame | |
| codec->frame.width = value; | |
| break; | |
| case CODEC_TAG_FRAME_HEIGHT: // Height of the frame | |
| codec->frame.height = value; | |
| //DAN20080729 -- Initialize the default colorspace based on clip resolution | |
| if ((decoder->frame.colorspace & COLORSPACE_MASK) == COLOR_SPACE_UNDEFINED) | |
| { | |
| int internalheight = value; | |
| int internalwidth = codec->frame.width; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| { | |
| internalwidth *= 2; | |
| internalheight *= 2; | |
| } | |
| if(internalheight > 576 || internalwidth > 720) | |
| decoder->frame.colorspace |= COLOR_SPACE_CG_709; | |
| else | |
| decoder->frame.colorspace |= COLOR_SPACE_CG_601; | |
| } | |
| //if(decoder->frame.colorspace_filedefault) | |
| // decoder->frame.colorspace = decoder->frame.colorspace_filedefault; | |
| if(decoder->frame.colorspace_override) | |
| decoder->frame.colorspace = decoder->frame.colorspace_override; | |
| break; | |
| case CODEC_TAG_ENCODED_COLORSPACE: //DAN20080729 | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_BAYER) | |
| value &= ~(COLOR_SPACE_BT_601|COLOR_SPACE_BT_709); // Bayer has no 601 vs 709, | |
| //there was a bug in 3.9.4 that had bayer flagged as 601. | |
| if(decoder->frame.colorspace_override) | |
| decoder->frame.colorspace = decoder->frame.colorspace_override; | |
| else | |
| { | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_YUV_422) | |
| { | |
| decoder->frame.colorspace &= ~(COLOR_SPACE_BT_601 | COLOR_SPACE_BT_709); | |
| decoder->frame.colorspace |= (value & (COLOR_SPACE_BT_601 | COLOR_SPACE_BT_709)); | |
| //Let the VSRGB status be controllable by the calling application (e.g. Vegas) | |
| } | |
| else | |
| { | |
| decoder->frame.colorspace &= ~(COLOR_SPACE_VS_RGB); | |
| decoder->frame.colorspace |= (value & (COLOR_SPACE_VS_RGB)); | |
| } | |
| } | |
| decoder->frame.colorspace_filedefault = value; | |
| break; | |
| case CODEC_TAG_FRAME_FORMAT: // Format of the encoded pixels (GRAY, YUV, RGB, RGBA) | |
| assert(0); | |
| break; | |
| case CODEC_TAG_INPUT_FORMAT: // Format of the original pixels | |
| codec->input_format = value; | |
| // Set the encoded format if it has not already been set | |
| // error = UpdateEncodedFormat(codec, (COLOR_FORMAT)value); | |
| break; | |
| case CODEC_TAG_ENCODED_FORMAT: // Internal format of the encoded data | |
| case CODEC_TAG_OLD_ENCODED_FORMAT: | |
| codec->encoded_format = value; | |
| if(codec->encoded_format == ENCODED_FORMAT_RGBA_4444 && codec->num_channels == 3) | |
| codec->encoded_format = ENCODED_FORMAT_RGB_444; | |
| break; | |
| case CODEC_TAG_FRAME_INDEX: // Position of frame within the group | |
| codec->frame.group_index = value; | |
| break; | |
| case CODEC_TAG_FRAME_TRAILER: // Frame trailer and checksum | |
| codec->sample_done = true; | |
| break; | |
| case CODEC_TAG_LOWPASS_SUBBAND: // Subband number of the lowpass band | |
| codec->lowpass.subband = value; | |
| error = SetDefaultEncodedFormat(codec); | |
| break; | |
| case CODEC_TAG_NUM_LEVELS: // Number of wavelet levels | |
| codec->lowpass.level = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_WIDTH: // Width of the lowpass band | |
| codec->lowpass.width = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_HEIGHT: // Height of the lowpass band | |
| codec->lowpass.height = value; | |
| break; | |
| case CODEC_TAG_MARGIN_TOP: // Margins that define the encoded subset | |
| codec->lowpass.margin.top = value; | |
| break; | |
| case CODEC_TAG_MARGIN_BOTTOM: | |
| codec->lowpass.margin.bottom = value; | |
| break; | |
| case CODEC_TAG_MARGIN_LEFT: | |
| codec->lowpass.margin.left = value; | |
| break; | |
| case CODEC_TAG_MARGIN_RIGHT: | |
| codec->lowpass.margin.right = value; | |
| break; | |
| case CODEC_TAG_PIXEL_OFFSET: // Quantization parameters | |
| codec->lowpass.pixel_offset = value; | |
| break; | |
| case CODEC_TAG_QUANTIZATION: // Quantization divisor used during encoding | |
| codec->lowpass.quantization = value; | |
| break; | |
| case CODEC_TAG_PIXEL_DEPTH: // Number of bits per pixel | |
| codec->lowpass.bits_per_pixel = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_TRAILER: // Lowpass trailer | |
| assert(0); | |
| break; | |
| case CODEC_TAG_WAVELET_TYPE: // Type of wavelet | |
| codec->highpass.wavelet_type = value; | |
| break; | |
| case CODEC_TAG_WAVELET_NUMBER: // Number of the wavelet in the transform | |
| codec->highpass.wavelet_number = value; | |
| break; | |
| case CODEC_TAG_WAVELET_LEVEL: // Level of the wavelet in the transform | |
| codec->highpass.wavelet_level = value; | |
| break; | |
| case CODEC_TAG_NUM_BANDS: // Number of wavelet bands | |
| codec->highpass.num_bands = value; | |
| break; | |
| case CODEC_TAG_HIGHPASS_WIDTH: // Width of each highpass band | |
| codec->highpass.width = value; | |
| break; | |
| case CODEC_TAG_HIGHPASS_HEIGHT: // Height of each highpass band | |
| codec->highpass.height = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_BORDER: // Dimensions of lowpass border (obsolete) | |
| codec->highpass.lowpass_border = value; | |
| break; | |
| case CODEC_TAG_HIGHPASS_BORDER: // Dimensions of highpass border (obsolete) | |
| codec->highpass.highpass_border = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_SCALE: // Scale factor for lowpass band | |
| codec->highpass.lowpass_scale = value; | |
| break; | |
| case CODEC_TAG_LOWPASS_DIVISOR: // Divisor for the lowpass band | |
| codec->highpass.lowpass_divisor = value; | |
| break; | |
| case CODEC_TAG_HIGHPASS_TRAILER: // Highpass trailer | |
| assert(0); | |
| break; | |
| case CODEC_TAG_BAND_NUMBER: // Identifying number of a wavelet band | |
| codec->band.number = value; | |
| break; | |
| case CODEC_TAG_BAND_WIDTH: // Band data width | |
| codec->band.width = value; | |
| break; | |
| case CODEC_TAG_BAND_HEIGHT: // Band data height | |
| codec->band.height = value; | |
| break; | |
| case CODEC_TAG_BAND_SUBBAND: // Subband number of this wavelet band | |
| codec->band.subband = value; | |
| //assert(value != 255); | |
| break; | |
| case CODEC_TAG_BAND_ENCODING: // Encoding method for this band | |
| codec->band.encoding = value; | |
| break; | |
| case CODEC_TAG_BAND_QUANTIZATION: // Quantization applied to band | |
| codec->band.quantization = value; | |
| break; | |
| case CODEC_TAG_BAND_SCALE: // Band scale factor | |
| codec->band.scale = value; | |
| break; | |
| case CODEC_TAG_BAND_TRAILER: // Band trailer | |
| assert(0); | |
| break; | |
| case CODEC_TAG_NUM_ZEROVALUES: // Number of zero values | |
| assert(0); | |
| break; | |
| case CODEC_TAG_NUM_ZEROTREES: // Number of zerotrees | |
| assert(0); | |
| break; | |
| case CODEC_TAG_NUM_POSITIVES: // Number of positive values | |
| assert(0); | |
| break; | |
| case CODEC_TAG_NUM_NEGATIVES: // Number of negative values | |
| assert(0); | |
| break; | |
| case CODEC_TAG_NUM_ZERONODES: // Number of zerotree nodes | |
| assert(0); | |
| break; | |
| case CODEC_TAG_CHANNEL: // Channel number | |
| assert(0); | |
| break; | |
| case CODEC_TAG_INTERLACED_FLAGS: // Interlaced structure of the video stream | |
| //assert(0); | |
| break; | |
| //assert(0); | |
| case CODEC_TAG_PROTECTION_FLAGS: // Copy protection bits | |
| //assert(0); | |
| break; | |
| case CODEC_TAG_PICTURE_ASPECT_X: // Numerator of the picture aspect ratio | |
| codec->picture_aspect_x = value; | |
| //assert(0); | |
| break; | |
| case CODEC_TAG_PICTURE_ASPECT_Y: // Denominator of the picture aspect ratio | |
| codec->picture_aspect_y = value; | |
| //assert(0); | |
| break; | |
| case CODEC_TAG_SAMPLE_FLAGS: // Flag bits that control sample decoding | |
| // Progressive versus interlaced decoding is specified by the sample flags | |
| error = UpdateCodecFlags(codec, value); | |
| break; | |
| case CODEC_TAG_FRAME_NUMBER: // Sequence number of the frame in the bitstream | |
| codec->frame_number = value; | |
| break; | |
| // This TAG is now support as part of the universal decoder. | |
| // Only Prospect HD builds can decode 10bit. | |
| case CODEC_TAG_PRECISION: // Number of bits in the video source | |
| codec->precision = value; | |
| { | |
| int i; | |
| for(i=0;i<TRANSFORM_MAX_CHANNELS;i++) | |
| { | |
| TRANSFORM *transform = decoder->transform[i]; | |
| if(transform) | |
| { | |
| GetTransformPrescale(transform, codec->transform_type, codec->precision); | |
| } | |
| } | |
| } | |
| break; | |
| case CODEC_TAG_PRESCALE_TABLE: | |
| { | |
| int i; | |
| int prescale[TRANSFORM_MAX_WAVELETS] = {0}; | |
| for(i=0;i<TRANSFORM_MAX_WAVELETS;i++) | |
| prescale[i] = value >> (14-i*2) & 0x3; | |
| for(i=0;i<TRANSFORM_MAX_CHANNELS;i++) | |
| { | |
| TRANSFORM *transform = decoder->transform[i]; | |
| if(transform) | |
| { | |
| memcpy(transform->prescale, prescale, sizeof(prescale)); | |
| } | |
| } | |
| } | |
| break; | |
| case CODEC_TAG_VERSION: // Version number of the encoder used in each GOP. | |
| codec->version[0] = (value>>12) & 0xf; | |
| codec->version[1] = (value>>8) & 0xf; | |
| codec->version[2] = value & 0xff; | |
| break; | |
| case CODEC_TAG_QUALITY_L: // | |
| codec->encode_quality &= 0xffff0000; | |
| codec->encode_quality |= value; | |
| break; | |
| case CODEC_TAG_QUALITY_H: // | |
| codec->encode_quality &= 0xffff; | |
| codec->encode_quality |= value<<16; | |
| break; | |
| case CODEC_TAG_BAND_CODING_FLAGS: | |
| codec->active_codebook = value & 0xf; // 0-15 valid code books | |
| codec->difference_coding = (value>>4) & 1; | |
| break; | |
| // Peak table processing | |
| case CODEC_TAG_PEAK_TABLE_OFFSET_L: | |
| codec->peak_table.offset &= ~0xffff; | |
| codec->peak_table.offset |= (value & 0xffff); | |
| codec->peak_table.base = (PIXEL *)(input->lpCurrentWord); | |
| codec->peak_table.level = 0; // reset for the next subband | |
| break; | |
| case CODEC_TAG_PEAK_TABLE_OFFSET_H: | |
| codec->peak_table.offset &= 0xffff; | |
| codec->peak_table.offset |= (value & 0xffff)<<16; | |
| codec->peak_table.level = 0; // reset for the next subband | |
| break; | |
| case CODEC_TAG_PEAK_LEVEL: | |
| codec->peak_table.level = value; | |
| codec->peak_table.base += codec->peak_table.offset / sizeof(PIXEL); | |
| break; | |
| case CODEC_TAG_PEAK_TABLE: | |
| //this is the chunk header, so we have peak data | |
| codec->peak_table.level = 0; // reset for the next subband | |
| //Just skip as the data was read ahead | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| input->lpCurrentWord += chunksize*4; | |
| input->nWordsUsed -= chunksize*4; | |
| break; | |
| #if (1 && DEBUG) | |
| case CODEC_TAG_SAMPLE_END: // Marks the end of the sample (for debugging only) | |
| assert(0); | |
| break; | |
| #endif | |
| default: // Unknown tag | |
| if(tag & 0x4000) | |
| { | |
| if(tag & 0x2000) // i.e. 0x6xxx = 24bit size. | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| } | |
| else // 16bit size | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| } | |
| } | |
| else if(tag & 0x2000) //24bit LONGs chunk size | |
| { | |
| optional = true; // Fixes a weird seneraio where the size fields in SizeTagPop() has not | |
| // updated the size and turned the tag to optional. TODO : WHY | |
| chunksize = 0; // not not skip | |
| // chunksize = value + ((tag & 0xff)<<16); | |
| // do not skip an unknown but optional chunk | |
| // These are only use to size subbands, but the data within should not be skipped | |
| // unless | |
| if((tag & 0xff00) == CODEC_TAG_UNCOMPRESS) | |
| { | |
| optional = true; | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| decoder->uncompressed_chunk = (uint32_t *)input->lpCurrentWord; | |
| decoder->uncompressed_size = chunksize*4; | |
| decoder->sample_uncompressed = 1; | |
| } | |
| } | |
| assert(optional); | |
| if(!optional) | |
| { | |
| error = CODEC_ERROR_UNKNOWN_REQUIRED_TAG; | |
| } | |
| else if(chunksize > 0) // skip this option chunk | |
| { | |
| input->lpCurrentWord += chunksize*4; | |
| input->nWordsUsed -= chunksize*4; | |
| } | |
| break; | |
| } | |
| return error; | |
| } | |
| void UpdateWaveletBandValidFlags(DECODER *decoder, IMAGE *wavelet, int band) | |
| { | |
| assert(decoder != NULL); | |
| assert(wavelet != NULL); | |
| if (decoder != NULL && wavelet != NULL) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| #if _THREADED_DECODER | |
| // Lock access to the wavelet data | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| Lock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Changing band valid flags: 0x%04X, mask: 0x%04X\n", | |
| wavelet->band_valid_flags, BAND_VALID_MASK(band)); | |
| } | |
| #endif | |
| // Update the wavelet band flags | |
| wavelet->band_valid_flags |= BAND_VALID_MASK(band); | |
| wavelet->band_started_flags |= BAND_VALID_MASK(band); | |
| #if _THREADED_DECODER | |
| // Unlock access to the wavelet data | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| Unlock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| } | |
| } | |
| void UpdateWaveletBandStartedFlags(DECODER *decoder, IMAGE *wavelet, int band) | |
| { | |
| assert(decoder != NULL); | |
| assert(wavelet != NULL); | |
| if (decoder != NULL && wavelet != NULL) | |
| { | |
| // Update the wavelet band flags | |
| #if _DELAYED_THREAD_START==0 | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| Lock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| wavelet->band_started_flags |= BAND_VALID_MASK(band); | |
| #if _DELAYED_THREAD_START==0 | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| Unlock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| } | |
| } | |
| bool DecodedBandsValid(IMAGE *wavelet, int index, int transform_type) | |
| { | |
| uint32_t threaded_band_mask; | |
| uint32_t wavelet_band_mask; | |
| uint32_t decoded_band_mask; | |
| bool decoded_bands_valid; | |
| // Has this wavelet been created? | |
| if (wavelet == NULL) | |
| { | |
| // Too soon to wait for the wavelet bands to be decoded | |
| return false; | |
| } | |
| // Is this a fieldplus transform? | |
| if (transform_type == TRANSFORM_TYPE_FIELDPLUS) | |
| { | |
| // Is this the temporal wavelet? | |
| if (index == 2) | |
| { | |
| assert(wavelet->wavelet_type == WAVELET_TYPE_TEMPORAL); | |
| assert(wavelet->num_bands == 2); | |
| // Earlier transforms in the queue will compute both wavelet bands | |
| return true; | |
| } | |
| // Is this wavelet at the end of a chain of transforms? | |
| if (index == 3 || index == 5) | |
| { | |
| // Must wait for all bands to be decoded | |
| threaded_band_mask = 0; | |
| } | |
| else | |
| { | |
| // The lowpass band will be computed by transforms earlier in the queue | |
| threaded_band_mask = BAND_VALID_MASK(0); | |
| } | |
| } | |
| // Is this a spatial transform? | |
| else if (transform_type == TRANSFORM_TYPE_SPATIAL) | |
| { | |
| // Is this wavelet at the top of the pyramid? | |
| if (index == 2) | |
| { | |
| // Must wait for all bands to be decoded | |
| threaded_band_mask = 0; | |
| } | |
| #if 0 | |
| // Is this wavelet at the bottom of the pyramid? | |
| else if (index == 0) | |
| { | |
| // Must wait for all bands to be decoded | |
| threaded_band_mask = 0; | |
| } | |
| #endif | |
| else | |
| { | |
| // The lowpass band will be computed by transforms earlier in the queue | |
| threaded_band_mask = BAND_VALID_MASK(0); | |
| } | |
| } | |
| else | |
| { | |
| // Unknown type of transform | |
| assert(0); | |
| // Assume that the bands are not valid | |
| return false; | |
| } | |
| // Compute the mask for the bands in this wavelet | |
| decoded_band_mask = ((1 << wavelet->num_bands) - 1); | |
| // Clear the bit for the band computed by the threaded transform | |
| decoded_band_mask &= ~threaded_band_mask; | |
| // Compute the wavelet bands that have been decoded | |
| wavelet_band_mask = (wavelet->band_valid_flags & decoded_band_mask); | |
| // Have all of the bands not computed by the transform thread been decoded? | |
| decoded_bands_valid = (wavelet_band_mask == decoded_band_mask); | |
| return decoded_bands_valid; | |
| } | |
| void QueueThreadedTransform(DECODER *decoder, int channel, int index) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| TRANSFORM *transform = decoder->transform[channel]; | |
| //IMAGE *wavelet = transform->wavelet[index]; | |
| int precision = codec->precision; | |
| // The transform data structure must exist | |
| assert(transform != NULL); | |
| // The transform thread variables should have been created | |
| { | |
| int free_entry; | |
| #if _DELAYED_THREAD_START==0 | |
| // Lock access to the transform queue | |
| Lock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| // Copy the transform parameters into the next queue entry | |
| free_entry = decoder->transform_queue.free_entry; | |
| assert(0 <= free_entry && free_entry < DECODING_QUEUE_LENGTH); | |
| if (0 <= free_entry && free_entry < DECODING_QUEUE_LENGTH) | |
| { | |
| assert(transform != NULL); | |
| assert(0 <= channel && channel < TRANSFORM_MAX_CHANNELS); | |
| assert(0 <= index && index < TRANSFORM_MAX_WAVELETS); | |
| // Note: The wavelet may not exist when the transform is queued | |
| decoder->transform_queue.queue[free_entry].transform = transform; | |
| decoder->transform_queue.queue[free_entry].channel = channel; | |
| decoder->transform_queue.queue[free_entry].index = index; | |
| decoder->transform_queue.queue[free_entry].precision = precision; | |
| decoder->transform_queue.queue[free_entry].done = 0; | |
| // Update the transform request queue | |
| decoder->transform_queue.free_entry++; | |
| decoder->transform_queue.num_entries++; | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Queued transform, channel: %d, index: %d\n", channel, index); | |
| } | |
| #endif | |
| } | |
| #if _DELAYED_THREAD_START==0 | |
| Unlock(&decoder->entropy_worker_new.lock); | |
| #endif | |
| } | |
| } | |
| #if _THREADED_DECODER | |
| void WaitForTransformThread(DECODER *decoder) | |
| { | |
| if(decoder->entropy_worker_new.pool.thread_count) | |
| { | |
| #if _DELAYED_THREAD_START | |
| ThreadPoolSendMessage(&decoder->entropy_worker_new.pool, THREAD_MESSAGE_START); | |
| #endif | |
| ThreadPoolWaitAllDone(&decoder->entropy_worker_new.pool); | |
| decoder->transform_queue.started = 0; | |
| decoder->transform_queue.num_entries = 0; | |
| decoder->transform_queue.next_entry = 0; | |
| decoder->transform_queue.free_entry = 0; | |
| } | |
| } | |
| #endif | |
| #endif | |
| #if _INTERLACED_WORKER_THREADS | |
| void TransformInverseFrameThreadedToYUV(DECODER *decoder, int frame_index, int num_channels, | |
| uint8_t *output, int pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision) | |
| { | |
| int32_t lPreviousCount,i; | |
| // There are half as many input rows as output rows | |
| int transform_height = (((info->height+7)/8)*8) / 2; | |
| int middle_row_count = transform_height; | |
| // Post a message to the mailbox | |
| struct interlace_data *mailbox = &decoder->interlaced_worker.interlace_data; | |
| mailbox->type = THREAD_TRANSFORM_FRAME_YUV; | |
| mailbox->frame = frame_index; | |
| mailbox->num_channels = num_channels; | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->chroma_offset = chroma_offset; | |
| mailbox->precision = precision; | |
| // Set the semaphore to the number of rows | |
| decoder->interlaced_worker.current_row = 0; | |
| ReleaseSemaphore(decoder->interlaced_worker.row_semaphore, middle_row_count, &lPreviousCount); | |
| assert(lPreviousCount == 0); | |
| // Wake up both worker threads | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| SetEvent(decoder->interlaced_worker.start_event[i]); | |
| } | |
| // Wait for both worker threads to finish | |
| WaitForMultipleObjects(THREADS_IN_LAST_WAVELET, decoder->interlaced_worker.done_event, true, INFINITE); | |
| } | |
| void TransformInverseFrameThreadedToRow16u(DECODER *decoder, int frame_index, int num_channels, | |
| PIXEL16U *output, int pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision) | |
| { | |
| int32_t lPreviousCount,i; | |
| // There are half as many input rows as output rows | |
| int transform_height = (((info->height+7)/8)*8) / 2; | |
| int middle_row_count = transform_height; | |
| // Post a message to the mailbox | |
| struct interlace_data *mailbox = &decoder->interlaced_worker.interlace_data; | |
| mailbox->type = THREAD_TRANSFORM_FRAME_ROW16U; | |
| mailbox->frame = frame_index; | |
| mailbox->num_channels = num_channels; | |
| mailbox->output = (uint8_t *)output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->chroma_offset = chroma_offset; | |
| mailbox->precision = precision; | |
| // Set the semaphore to the number of rows | |
| decoder->interlaced_worker.current_row = 0; | |
| ReleaseSemaphore(decoder->interlaced_worker.row_semaphore, middle_row_count, &lPreviousCount); | |
| assert(lPreviousCount == 0); | |
| // Wake up both worker threads | |
| for(i=0; i<THREADS_IN_LAST_WAVELET; i++) | |
| { | |
| SetEvent(decoder->interlaced_worker.start_event[i]); | |
| } | |
| // Wait for both worker threads to finish | |
| WaitForMultipleObjects(THREADS_IN_LAST_WAVELET, decoder->interlaced_worker.done_event, true, INFINITE); | |
| } | |
| DWORD WINAPI InterlacedWorkerThreadProc(LPVOID lpParam) | |
| { | |
| DECODER *decoder = (DECODER *)lpParam; | |
| FILE *logfile = decoder->logfile; | |
| struct interlace_data *data = &decoder->interlaced_worker.interlace_data; | |
| int thread_index; | |
| HANDLE hObjects[2]; | |
| DWORD dwReturnValue; | |
| if(decoder->thread_cntrl.affinity) | |
| { | |
| HANDLE hCurrentThread = GetCurrentThread(); | |
| SetThreadAffinityMask(hCurrentThread,decoder->thread_cntrl.affinity); | |
| } | |
| // Set the handler for system exceptions | |
| #ifdef _WIN32 | |
| SetDefaultExceptionHandler(); | |
| #endif | |
| // Determine the index of this worker thread | |
| if(decoder->interlaced_worker.lock_init) | |
| { | |
| EnterCriticalSection(&decoder->interlaced_worker.lock); | |
| } | |
| thread_index = decoder->interlaced_worker.thread_count++; | |
| if(decoder->interlaced_worker.lock_init) | |
| LeaveCriticalSection(&decoder->interlaced_worker.lock); | |
| // The transform worker variables should have been created | |
| assert(decoder->interlaced_worker.start_event[thread_index] != NULL); | |
| assert(decoder->interlaced_worker.row_semaphore != NULL); | |
| assert(decoder->interlaced_worker.done_event[thread_index] != NULL); | |
| assert(decoder->interlaced_worker.stop_event != NULL); | |
| if (!(decoder->interlaced_worker.start_event[thread_index] != NULL && | |
| decoder->interlaced_worker.row_semaphore != NULL && | |
| decoder->interlaced_worker.done_event[thread_index] != NULL && | |
| decoder->interlaced_worker.stop_event != NULL)) { | |
| return 1; | |
| } | |
| hObjects[0] = decoder->interlaced_worker.start_event[thread_index]; | |
| hObjects[1] = decoder->interlaced_worker.stop_event; | |
| for (;;) | |
| { | |
| // Wait for the signal to begin processing a transform | |
| dwReturnValue = WaitForMultipleObjects(2, hObjects, false, INFINITE); | |
| // Received a signal to begin inverse transform processing? | |
| if (dwReturnValue == WAIT_OBJECT_0) | |
| { | |
| int type; // Type of inverse transform to perform | |
| int frame_index; // Index of output frame to produce | |
| int num_channels; // Number of channels in the transform array | |
| uint8_t *output; // Output frame buffer | |
| int pitch; // Output frame pitch | |
| FRAME_INFO info; // Format of the output frame | |
| int chroma_offset; // Offset for the output chroma | |
| int precision; // Source pixel bit depth | |
| // Lock access to the transform data | |
| if(decoder->interlaced_worker.lock_init) { | |
| EnterCriticalSection(&decoder->interlaced_worker.lock); | |
| } | |
| // Get the processing parameters | |
| type = data->type; | |
| frame_index = data->frame; | |
| num_channels = data->num_channels; | |
| output = data->output; | |
| pitch = data->pitch; | |
| memcpy(&info, &data->info, sizeof(FRAME_INFO)); | |
| chroma_offset = data->chroma_offset; | |
| precision = data->precision; | |
| // Unlock access to the transform data | |
| if(decoder->interlaced_worker.lock_init) | |
| LeaveCriticalSection(&decoder->interlaced_worker.lock); | |
| // Select the type of inverse transform to perform | |
| switch (type) | |
| { | |
| case THREAD_TRANSFORM_FRAME_YUV: | |
| //TODO: more to new _THREADED model | |
| TransformInverseFrameSectionToYUV(decoder, thread_index, frame_index, num_channels, | |
| output, pitch, &info, chroma_offset, precision); | |
| break; | |
| case THREAD_TRANSFORM_FRAME_ROW16U: | |
| //TODO: more to new _THREADED model | |
| TransformInverseFrameSectionToRow16u(decoder, thread_index, frame_index, num_channels, | |
| (PIXEL16U *)output, pitch, &info, chroma_offset, precision); | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| // Signal that this thread is done | |
| SetEvent(decoder->interlaced_worker.done_event[thread_index]); | |
| } | |
| else | |
| { | |
| // Should have a condition that causes the thread to terminate | |
| assert(dwReturnValue == WAIT_OBJECT_0+1 || dwReturnValue == WAIT_ABANDONED); | |
| break; | |
| } | |
| } | |
| return 0; | |
| } | |
| #endif | |
| void GetDecodedFrameDimensions(TRANSFORM **transform_array, | |
| int num_channels, | |
| int frame_index, | |
| int resolution, | |
| int *decoded_width_out, | |
| int *decoded_height_out) | |
| { | |
| IMAGE *wavelet = NULL; | |
| int decoded_scale = 0; | |
| int wavelet_width; | |
| int wavelet_height; | |
| int decoded_width; | |
| int decoded_height; | |
| // Get the decoding scale | |
| switch(resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL_DEBAYER: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| #if DEBUG | |
| assert(AllTransformBandsValid(transform_array, num_channels, frame_index)); | |
| #endif | |
| decoded_scale = 2; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| break; | |
| case DECODED_RESOLUTION_FULL: | |
| #if DEBUG | |
| assert(AllTransformBandsValid(transform_array, num_channels, frame_index)); | |
| #endif | |
| decoded_scale = 2; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| break; | |
| case DECODED_RESOLUTION_HALF_NODEBAYER: | |
| case DECODED_RESOLUTION_HALF: | |
| #if DEBUG | |
| assert(AllLowpassBandsValid(transform_array, num_channels, frame_index)); | |
| #endif | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[0]; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[3]; | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| decoded_scale = 1; | |
| wavelet = transform_array[0]->wavelet[5]; | |
| // Is this an intra frame? | |
| if (wavelet == NULL) { | |
| wavelet = transform_array[0]->wavelet[2]; | |
| } | |
| break; | |
| default: | |
| assert(0); | |
| break; | |
| } | |
| // Compute the decoded frame dimensions | |
| assert(wavelet != NULL); | |
| wavelet_width = wavelet->width; | |
| wavelet_height = wavelet->height; | |
| decoded_width = decoded_scale * wavelet_width; | |
| decoded_height = decoded_scale * wavelet_height; | |
| if (decoded_width_out) { | |
| *decoded_width_out = decoded_width; | |
| } | |
| if (decoded_height_out) { | |
| *decoded_height_out = decoded_height; | |
| } | |
| } | |
| // Reconstruct Bayer format to the requested output format | |
| CODEC_ERROR UncompressedSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int precision = codec->precision; | |
| int format = info->format; | |
| int width = info->width; | |
| int height = info->height; | |
| //int resolution = info->resolution; | |
| // Compute the number of bytes between each row of Bayer data | |
| //int bayer_pitch = 2 * width * sizeof(PIXEL16U); | |
| // Compute the pitch between pairs of rows of bayer data (one pair per image row) | |
| //int raw_bayer_pitch = 2 * bayer_pitch; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RG48: //DAN20090120 added not sure why they weren't here. | |
| case DECODED_FORMAT_RG64: //DAN20101207 added not sure why they weren't here. | |
| case DECODED_FORMAT_WP13: //DAN20090120 "" | |
| case DECODED_FORMAT_W13A: //DAN20101207 "" | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_YR16: | |
| case DECODED_FORMAT_V210: | |
| case DECODED_FORMAT_YU64: | |
| case DECODED_FORMAT_YUYV: //? | |
| case DECODED_FORMAT_UYVY: //? | |
| case DECODED_FORMAT_R408: | |
| case DECODED_FORMAT_V408: | |
| error = CODEC_ERROR_OKAY; | |
| break; | |
| case DECODED_FORMAT_BYR2: | |
| case DECODED_FORMAT_BYR4: | |
| { | |
| //bool linearRestore = false; | |
| unsigned short *curve = NULL; | |
| if(decoder->BYR4LinearRestore && decoder->frame.format == DECODED_FORMAT_BYR4 && decoder->cfhddata.encode_curve_preset == 0) | |
| { | |
| curve = decoder->BYR4LinearRestore; | |
| } | |
| ConvertPackedToBYR2(width, height, decoder->uncompressed_chunk, decoder->uncompressed_size, output_buffer, output_pitch, curve); | |
| } | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| return CODEC_ERROR_OKAY; | |
| break; | |
| case DECODED_FORMAT_BYR3: | |
| ConvertPackedToBYR3(width, height, decoder->uncompressed_chunk, decoder->uncompressed_size, output_buffer, output_pitch); | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| return CODEC_ERROR_OKAY; | |
| break; | |
| } | |
| if(error) | |
| return error; | |
| //int row; | |
| //int column; | |
| // Need to allocate a scratch buffer for decoding the Bayer frame? | |
| if (decoder->RawBayer16 == NULL) | |
| { | |
| // Four Bayer data samples at each 2x2 quad in the grid | |
| int pixel_size = 4 * sizeof(PIXEL16U); | |
| int frame_size; | |
| const size_t alignment = 16; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| frame_size = width * height * pixel_size; | |
| #if _ALLOCATOR | |
| decoder->RawBayer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)frame_size, alignment); | |
| #else | |
| decoder->RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, alignment); | |
| #endif | |
| assert(decoder->RawBayer16 != NULL); | |
| if (! (decoder->RawBayer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RawBayerSize = frame_size; | |
| if(decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| int size = frame_size*3; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| size = frame_size*4; | |
| #if _ALLOCATOR | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)size, 16); | |
| #else | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(size, 16); | |
| #endif | |
| assert(decoder->RGBFilterBuffer16 != NULL); | |
| if (! (decoder->RGBFilterBuffer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RGBFilterBufferSize = frame_size*3; | |
| } | |
| } | |
| // Using the RGBFilterBuffer16 as scratch space | |
| ConvertPackedToRawBayer16(width, height, decoder->uncompressed_chunk, decoder->uncompressed_size, decoder->RawBayer16, decoder->RGBFilterBuffer16, info->resolution); | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| #if _THREADED | |
| //DemosaicRAW | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int inverted = false; | |
| uint8_t *output = output_buffer; | |
| int pitch = output_pitch; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| if (format == DECODED_FORMAT_RGB24) | |
| { | |
| format = DECODED_FORMAT_RGB24_INVERTED; | |
| inverted = true; | |
| } | |
| else if (format == DECODED_FORMAT_RGB32) | |
| { | |
| format = DECODED_FORMAT_RGB32_INVERTED; | |
| inverted = true; | |
| } | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| if(info->resolution == DECODED_RESOLUTION_FULL_DEBAYER || info->resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| height *= 2; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| #else | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| #endif | |
| return error; | |
| } | |
| // Reconstruct uncompressed v210 YUV format to the requested output format | |
| CODEC_ERROR UncompressedSampleFrameYUVToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int precision = codec->precision; | |
| int format = info->format; | |
| int width = info->width; | |
| int height = info->height; | |
| int resolution = info->resolution; | |
| // Compute the number of bytes between each row of Bayer data | |
| //int bayer_pitch = 2 * width * sizeof(PIXEL16U); | |
| // Compute the pitch between pairs of rows of bayer data (one pair per image row) | |
| //int raw_bayer_pitch = 2 * bayer_pitch; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| if(format == DECODED_FORMAT_V210 && resolution == DECODED_RESOLUTION_FULL && decoder->use_active_metadata_decoder == false) | |
| { | |
| int smallest_Stride = output_pitch; | |
| int unc_Stride = decoder->uncompressed_size / height; | |
| if(unc_Stride < smallest_Stride) | |
| smallest_Stride = unc_Stride; | |
| if(unc_Stride == output_pitch) | |
| memcpy(output_buffer, decoder->uncompressed_chunk, decoder->uncompressed_size); | |
| else | |
| { | |
| int y; | |
| uint8_t *src = (uint8_t *)decoder->uncompressed_chunk; | |
| uint8_t *dst = (uint8_t *)output_buffer; | |
| for(y=0; y<height; y++) | |
| { | |
| memcpy(dst, src, smallest_Stride); | |
| src += unc_Stride; | |
| dst += output_pitch; | |
| } | |
| } | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| return CODEC_ERROR_OKAY; | |
| } | |
| if((format == DECODED_FORMAT_YUYV || format == DECODED_FORMAT_UYVY) && resolution == DECODED_RESOLUTION_FULL && decoder->use_active_metadata_decoder == false) | |
| { | |
| int smallest_Stride = output_pitch; | |
| int unc_Stride = decoder->uncompressed_size / height; | |
| if(unc_Stride < smallest_Stride) | |
| smallest_Stride = unc_Stride; | |
| { | |
| int y; | |
| uint8_t *src = (uint8_t *)decoder->uncompressed_chunk; | |
| uint8_t *dst = (uint8_t *)output_buffer; | |
| for(y=0; y<height; y++) | |
| { | |
| uint32_t *input_ptr = (uint32_t *)src; | |
| int pos = 0; | |
| int column=0,length = width; | |
| length -= length % 6; //DAN03252004 -- fix a memory overflow. | |
| for (column=0; column < length; column += 6) | |
| { | |
| uint32_t yuv; | |
| int y; | |
| int u; | |
| int v; | |
| // Read the first word | |
| yuv = *(input_ptr++); | |
| u = (yuv >> V210_VALUE1_SHIFT) & V210_VALUE_MASK; | |
| y = (yuv >> V210_VALUE2_SHIFT) & V210_VALUE_MASK; | |
| v = (yuv >> V210_VALUE3_SHIFT) & V210_VALUE_MASK; | |
| // Expand the pixels to sixteen bits | |
| u <<= 6; | |
| y <<= 6; | |
| v <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(u)>>8; | |
| // Read the second word | |
| yuv = *(input_ptr++); | |
| y = (yuv >> V210_VALUE1_SHIFT) & V210_VALUE_MASK; | |
| y <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(v)>>8; | |
| u = (yuv >> V210_VALUE2_SHIFT) & V210_VALUE_MASK; | |
| y = (yuv >> V210_VALUE3_SHIFT) & V210_VALUE_MASK; | |
| u <<= 6; | |
| y <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(u)>>8; | |
| // Read the third word | |
| yuv = *(input_ptr++); | |
| v = (yuv >> V210_VALUE1_SHIFT) & V210_VALUE_MASK; | |
| y = (yuv >> V210_VALUE2_SHIFT) & V210_VALUE_MASK; | |
| v <<= 6; | |
| y <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(v)>>8; | |
| u = (yuv >> V210_VALUE3_SHIFT) & V210_VALUE_MASK; | |
| u <<= 6; | |
| // Read the fourth word | |
| yuv = *(input_ptr++); | |
| y = (yuv >> V210_VALUE1_SHIFT) & V210_VALUE_MASK; | |
| y <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(u)>>8; | |
| v = (yuv >> V210_VALUE2_SHIFT) & V210_VALUE_MASK; | |
| y = (yuv >> V210_VALUE3_SHIFT) & V210_VALUE_MASK; | |
| v <<= 6; | |
| y <<= 6; | |
| dst[pos++] = SATURATE_16U(y)>>8; | |
| dst[pos++] = SATURATE_16U(v)>>8; | |
| } | |
| if(format == DECODED_FORMAT_UYVY) | |
| { | |
| for (column=0; column < pos; column += 2) | |
| { | |
| int t = dst[column]; | |
| dst[column] = dst[column+1]; | |
| dst[column+1] = t; | |
| } | |
| } | |
| src += unc_Stride; | |
| dst += output_pitch; | |
| } | |
| } | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| return CODEC_ERROR_OKAY; | |
| } | |
| { | |
| // Expand YUV at the target resolution, and use the ActiveMetadata engine. | |
| // Need to allocate a scratch buffer for decoding the frame? | |
| if (decoder->RawBayer16 == NULL || decoder->RawBayerSize < width * 64) //RawBayer used as a scratch buffer | |
| { | |
| //int pixel_size = 2 * sizeof(PIXEL16U); | |
| const size_t alignment = 16; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| int orig_width = width; | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| orig_width *= 2; | |
| if(resolution == DECODED_RESOLUTION_QUARTER) | |
| orig_width *= 4; | |
| if(decoder->RawBayer16) | |
| { | |
| #if _ALLOCATOR | |
| FreeAligned(allocator, decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = 0; | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = 0; | |
| #endif | |
| } | |
| #if _ALLOCATOR | |
| decoder->RawBayer16 = (PIXEL16U *)AllocAligned(allocator, orig_width * 64, alignment); | |
| #else | |
| decoder->RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(orig_width * 64, alignment); | |
| #endif | |
| assert(decoder->RawBayer16 != NULL); | |
| if (! (decoder->RawBayer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RawBayerSize = orig_width * 64; | |
| } | |
| } | |
| // unpack source original YUV into YU64? | |
| if(decoder->RawBayer16) | |
| { | |
| //uint8_t *src = (uint8_t *)decoder->uncompressed_chunk; | |
| //uint8_t *dst = (uint8_t *)output_buffer; | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output_buffer; | |
| mailbox->pitch = output_pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT_UNCOMPRESSED; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| #else | |
| { | |
| int orig_width = width; | |
| int orig_height = height; | |
| int row,lines = 1; | |
| int start,end; | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| orig_width *= 2; | |
| orig_height *= 2; | |
| lines = 2; | |
| } | |
| if(resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| orig_width *= 4; | |
| orig_height *= 4; | |
| lines = 4; | |
| } | |
| start = 0; | |
| end = height; | |
| if(format == DECODED_FORMAT_RGB32 || format == DECODED_FORMAT_RGB24) | |
| { | |
| start = height-1; | |
| end = -1; | |
| } | |
| for (row = start; row != end; end > start ? row++ : row--) | |
| { | |
| int whitebitdepth = 16; | |
| int flags = 0; | |
| uint8_t *planar_output[3]; | |
| int planar_pitch[3]; | |
| ROI roi; | |
| PIXEL16U *y_row_ptr; | |
| PIXEL16U *u_row_ptr; | |
| PIXEL16U *v_row_ptr; | |
| PIXEL16U *scanline = (PIXEL16U *)decoder->RawBayer16; | |
| PIXEL16U *scanline2 = scanline + orig_width * 8; | |
| unsigned short *sptr; | |
| int i,unc_Stride = decoder->uncompressed_size / orig_height; | |
| y_row_ptr = (PIXEL16U *)scanline; | |
| u_row_ptr = y_row_ptr + orig_width; | |
| v_row_ptr = u_row_ptr + orig_width/2; | |
| for(i=0; i<lines; i++) | |
| { | |
| src = (uint8_t *)decoder->uncompressed_chunk; | |
| src += row * unc_Stride; | |
| // Repack the row of 10-bit pixels into 16-bit pixels | |
| ConvertV210RowToYUV16((uint8_t *)src, y_row_ptr, u_row_ptr, v_row_ptr, orig_width, scanline2); | |
| // Advance to the next rows in the input and output images | |
| y_row_ptr += orig_width*2; | |
| u_row_ptr = y_row_ptr + orig_width; | |
| v_row_ptr = u_row_ptr + orig_width/2; | |
| } | |
| y_row_ptr = (PIXEL16U *)scanline; | |
| u_row_ptr = y_row_ptr + width; | |
| v_row_ptr = u_row_ptr + width/2; | |
| if(lines == 2) | |
| { | |
| for(i=0; i<width*2;i++) | |
| y_row_ptr[i] = (y_row_ptr[i*2] + y_row_ptr[i*2+1] + y_row_ptr[orig_width*2+i*2] + y_row_ptr[orig_width*2+i*2+1]) >> 2; | |
| } | |
| else if(lines == 4) | |
| { | |
| for(i=0; i<width*2;i++) | |
| y_row_ptr[i] = (y_row_ptr[i*4] + y_row_ptr[i*4+2] + y_row_ptr[orig_width*2*2+i*4] + y_row_ptr[orig_width*2*2+i*4+2]) >> 2; | |
| } | |
| roi.width = width; | |
| roi.height = 1; | |
| planar_output[0] = (uint8_t *)y_row_ptr; | |
| planar_output[1] = (uint8_t *)v_row_ptr; | |
| planar_output[2] = (uint8_t *)u_row_ptr; | |
| planar_pitch[0] = 0; | |
| planar_pitch[1] = 0; | |
| planar_pitch[2] = 0; | |
| if(decoder->apply_color_active_metadata) | |
| { | |
| ConvertYUVRow16uToBGRA64(planar_output, planar_pitch, roi, | |
| (unsigned char *)scanline2, width, output_pitch, | |
| COLOR_FORMAT_RGB_8PIXEL_PLANAR, decoder->frame.colorspace, &whitebitdepth, &flags); | |
| sptr = scanline2; | |
| sptr = ApplyActiveMetaData(decoder, width, 1, row, scanline2, scanline, | |
| info->format, &whitebitdepth, &flags); | |
| } | |
| else | |
| { | |
| ConvertYUVRow16uToBGRA64(planar_output, planar_pitch, roi, | |
| (unsigned char *)scanline2, width, output_pitch, | |
| COLOR_FORMAT_WP13, decoder->frame.colorspace, &whitebitdepth, &flags); | |
| sptr = scanline2; | |
| } | |
| ConvertLinesToOutput(decoder, width, 1, row, sptr, | |
| dst, output_pitch, format, whitebitdepth, flags); | |
| dst += output_pitch; | |
| } | |
| } | |
| #endif | |
| } | |
| error = CODEC_ERROR_OKAY; | |
| return error; | |
| } | |
| // Reconstruct uncompressed DPX0 RGB format to the requested output format | |
| CODEC_ERROR UncompressedSampleFrameRGBToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int precision = codec->precision; | |
| int format = info->format; | |
| //int output_format = info->output_format; // used by image_dev_only decodes | |
| int width = info->width; | |
| int height = info->height; | |
| int resolution = info->resolution; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| if( (format == DECODED_FORMAT_DPX0 || format == DECODED_FORMAT_AR10 || format == DECODED_FORMAT_AB10 || format == DECODED_FORMAT_RG30 || format == DECODED_FORMAT_R210) && | |
| resolution == DECODED_RESOLUTION_FULL && decoder->use_active_metadata_decoder == false) | |
| { | |
| int smallest_Stride = output_pitch; | |
| int unc_Stride = decoder->uncompressed_size / height; | |
| if(unc_Stride < smallest_Stride) | |
| smallest_Stride = unc_Stride; | |
| if(format != DECODED_FORMAT_DPX0) | |
| { | |
| int unc_Stride = decoder->uncompressed_size / height; | |
| ConvertDPX0ToRGB10((uint8_t *)decoder->uncompressed_chunk, unc_Stride, width, height, format); | |
| } | |
| if(unc_Stride == output_pitch) | |
| memcpy(output_buffer, decoder->uncompressed_chunk, decoder->uncompressed_size); | |
| else | |
| { | |
| int y; | |
| uint8_t *src = (uint8_t *)decoder->uncompressed_chunk; | |
| uint8_t *dst = (uint8_t *)output_buffer; | |
| for(y=0; y<height; y++) | |
| { | |
| memcpy(dst, src, smallest_Stride); | |
| src += unc_Stride; | |
| dst += output_pitch; | |
| } | |
| } | |
| decoder->uncompressed_chunk = 0; | |
| decoder->uncompressed_size = 0; | |
| return CODEC_ERROR_OKAY; | |
| } | |
| { | |
| // Expand YUV at the target resolution, and use the ActiveMetadata engine. | |
| // Need to allocate a scratch buffer for decoding the frame? | |
| if (decoder->RawBayer16 == NULL || decoder->RawBayerSize < width * 64) //RawBayer used as a scratch buffer | |
| { | |
| //int pixel_size = 2 * sizeof(PIXEL16U); | |
| const size_t alignment = 16; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| int orig_width = width; | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| orig_width *= 2; | |
| if(resolution == DECODED_RESOLUTION_QUARTER) | |
| orig_width *= 4; | |
| if(decoder->RawBayer16) | |
| { | |
| #if _ALLOCATOR | |
| FreeAligned(allocator, decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = 0; | |
| #else | |
| MEMORY_ALIGNED_FREE(decoder->RawBayer16); | |
| decoder->RawBayer16 = NULL; | |
| decoder->RawBayerSize = 0; | |
| #endif | |
| } | |
| #if _ALLOCATOR | |
| decoder->RawBayer16 = (PIXEL16U *)AllocAligned(allocator, orig_width * 64, alignment); | |
| #else | |
| decoder->RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(orig_width * 64, alignment); | |
| #endif | |
| assert(decoder->RawBayer16 != NULL); | |
| if (! (decoder->RawBayer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RawBayerSize = orig_width * 64; | |
| } | |
| } | |
| // unpack source original YUV into YU64? | |
| if(decoder->RawBayer16) | |
| { | |
| //uint8_t *src = (uint8_t *)decoder->uncompressed_chunk; | |
| //uint8_t *dst = (uint8_t *)output_buffer; | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output_buffer; | |
| mailbox->pitch = output_pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT_UNCOMPRESSED; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| #else | |
| { | |
| int orig_width = width; | |
| int orig_height = height; | |
| int row,lines = 1; | |
| int start,end; | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| orig_width *= 2; | |
| orig_height *= 2; | |
| lines = 2; | |
| } | |
| if(resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| orig_width *= 4; | |
| orig_height *= 4; | |
| lines = 4; | |
| } | |
| start = 0; | |
| end = height; | |
| if(format == DECODED_FORMAT_RGB32 || format == DECODED_FORMAT_RGB24) // Can this work, all the code below expects 10-bit | |
| { | |
| start = height-1; | |
| end = -1; | |
| } | |
| for (row = start; row != end; end > start ? row++ : row--) | |
| { | |
| int whitebitdepth = 16; | |
| int flags = 0; | |
| uint8_t *planar_output[3]; | |
| int planar_pitch[3]; | |
| ROI roi; | |
| PIXEL16U *y_row_ptr; | |
| PIXEL16U *u_row_ptr; | |
| PIXEL16U *v_row_ptr; | |
| PIXEL16U *scanline = (PIXEL16U *)decoder->RawBayer16; | |
| PIXEL16U *scanline2 = scanline + orig_width * 8; | |
| unsigned short *sptr; | |
| int i,unc_Stride = decoder->uncompressed_size / orig_height; | |
| whitebitdepth = 13; | |
| if(decoder->apply_color_active_metadata) | |
| flags = ACTIVEMETADATA_SRC_8PIXEL_PLANAR; | |
| else | |
| flags = 0; | |
| roi.width = width; | |
| roi.height = 1; | |
| if(lines == 1) | |
| { | |
| uint16_t *sptr; | |
| uint32_t j,*lptr = (uint32_t *)decoder->uncompressed_chunk; | |
| PIXEL16U *ptr = (PIXEL16U *)scanline; | |
| lptr += row * (unc_Stride>>2); | |
| sptr = (uint16_t *)lptr; | |
| for(i=0; i<width;i+=8) | |
| { | |
| int val,r,g,b; | |
| if(flags == ACTIVEMETADATA_SRC_8PIXEL_PLANAR) | |
| { | |
| if(decoder->image_dev_only) // HACK, currently assuming RG48 input data. | |
| { | |
| for(j=0; j<8; j++) | |
| { | |
| ptr[j] = sptr[0] >> 3; | |
| ptr[j+8] = sptr[1] >> 3; | |
| ptr[j+16] = sptr[2] >> 3; | |
| sptr += 3; | |
| } | |
| } | |
| else | |
| { | |
| for(j=0; j<8; j++) | |
| { | |
| val = SwapInt32(*lptr++); | |
| val >>= 2; | |
| b = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r = (val & 0x3ff) << 3; | |
| ptr[j] = r; | |
| ptr[j+8] = g; | |
| ptr[j+16] = b; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| if(decoder->image_dev_only) // HACK, currently assuming RG48 input data. | |
| { | |
| for(j=0; j<8*3; j+=3) | |
| { | |
| ptr[j] = sptr[0] >> 3; | |
| ptr[j+1] = sptr[1] >> 3; | |
| ptr[j+2] = sptr[2] >> 3; | |
| sptr += 3; | |
| } | |
| } | |
| else | |
| { | |
| for(j=0; j<8*3; j+=3) | |
| { | |
| val = SwapInt32(*lptr++); | |
| val >>= 2; | |
| b = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r = (val & 0x3ff) << 3; | |
| ptr[j] = r; | |
| ptr[j+1] = g; | |
| ptr[j+2] = b; | |
| } | |
| } | |
| } | |
| ptr += 24; | |
| } | |
| } | |
| else if(lines == 2) | |
| { | |
| uint32_t j,*lptr = (uint32_t)decoder->uncompressed_chunk; | |
| PIXEL16U *ptr = (PIXEL16U *)scanline; | |
| lptr += row * (unc_Stride>>2) * lines; | |
| for(i=0; i<width;i+=8) | |
| { | |
| int val,r,g,b,r2,g2,b2,r3,g3,b3,r4,g4,b4; | |
| for(j=0; j<8; j++) | |
| { | |
| val = SwapInt32(lptr[0]); | |
| val >>= 2; | |
| b = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r = (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[1]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[unc_Stride>>2]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[(unc_Stride>>2)+1]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| if(flags == ACTIVEMETADATA_SRC_8PIXEL_PLANAR) | |
| { | |
| ptr[j] = r>>2; | |
| ptr[j+8] = g>>2; | |
| ptr[j+16] = b>>2; | |
| } | |
| else | |
| { | |
| ptr[j*3] = r>>2; | |
| ptr[j*3+1] = g>>2; | |
| ptr[j*3+2] = b>>2; | |
| } | |
| lptr += lines; | |
| } | |
| ptr += 24; | |
| } | |
| } | |
| else if(lines == 4) | |
| { | |
| uint32_t j,*lptr = (uint32_t)decoder->uncompressed_chunk; | |
| PIXEL16U *ptr = (PIXEL16U *)scanline; | |
| lptr += row * (unc_Stride>>2) * lines; | |
| for(i=0; i<width;i+=8) | |
| { | |
| int val,r,g,b,r2,g2,b2,r3,g3,b3,r4,g4,b4; | |
| for(j=0; j<8; j++) | |
| { | |
| val = SwapInt32(lptr[0]); | |
| val >>= 2; | |
| b = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g = (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r = (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[2]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[unc_Stride>>1]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| val = SwapInt32(lptr[(unc_Stride>>1)+2]); | |
| val >>= 2; | |
| b += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| g += (val & 0x3ff) << 3; | |
| val >>= 10; | |
| r += (val & 0x3ff) << 3; | |
| if(flags == ACTIVEMETADATA_SRC_8PIXEL_PLANAR) | |
| { | |
| ptr[j] = r>>2; | |
| ptr[j+8] = g>>2; | |
| ptr[j+16] = b>>2; | |
| } | |
| else | |
| { | |
| ptr[j*3] = r>>2; | |
| ptr[j*3+1] = g>>2; | |
| ptr[j*3+2] = b>>2; | |
| } | |
| lptr += lines; | |
| } | |
| ptr += 24; | |
| } | |
| } | |
| sptr = scanline; | |
| if(decoder->apply_color_active_metadata) | |
| sptr = ApplyActiveMetaData(decoder, width, 1, row, scanline, scanline2, | |
| info->format, &whitebitdepth, &flags); | |
| ConvertLinesToOutput(decoder, width, 1, row, sptr, | |
| dst, output_pitch, format, whitebitdepth, flags); | |
| dst += output_pitch; | |
| } | |
| } | |
| #endif | |
| } | |
| error = CODEC_ERROR_OKAY; | |
| return error; | |
| } | |
| // Reconstruct Bayer format to the requested output format | |
| CODEC_ERROR ReconstructSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output, int pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| //int precision = codec->precision; | |
| //TRANSFORM **transform_array = decoder->transform; | |
| int resolution = info->resolution; | |
| //int format = info->format; | |
| // Switch to the subroutine for the requested resolution | |
| switch (resolution) | |
| { | |
| case DECODED_RESOLUTION_FULL_DEBAYER: | |
| case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| //error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| return ReconstructSampleFrameDeBayerFullToBuffer(decoder, info, frame, output, pitch); | |
| break; | |
| case DECODED_RESOLUTION_FULL: | |
| //return ReconstructSampleFrameBayerFullToBuffer(decoder, info, frame, output, pitch); | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| //case DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER: | |
| case DECODED_RESOLUTION_HALF_NODEBAYER: | |
| case DECODED_RESOLUTION_HALF: | |
| //return ReconstructSampleFrameBayerHalfToBuffer(decoder, info, frame, output, pitch); | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| case DECODED_RESOLUTION_QUARTER: | |
| //return ReconstructSampleFrameBayerQuarterToBuffer(decoder, frame, output, pitch); | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| case DECODED_RESOLUTION_LOWPASS_ONLY: | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| default: | |
| // The decoded resolution is not supported by this routine | |
| assert(0); | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| } | |
| return error; | |
| } | |
| // Reconstruct Bayer encoded data to full resolution | |
| CODEC_ERROR ReconstructSampleFrameBayerFullToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| //int precision = codec->precision; | |
| //TRANSFORM **transform_array = decoder->transform; | |
| //int decoded_width = 0; | |
| //int decoded_height = 0; | |
| //int resolution = info->resolution; | |
| int format = info->format; | |
| //int width = info->width; | |
| //int height = info->height; | |
| // Compute the number of bytes between each row of Bayer data | |
| //int bayer_pitch = 2 * width * sizeof(PIXEL16U); | |
| // Compute the pitch between pairs of rows of bayer data (one pair per image row) | |
| //int raw_bayer_pitch = 2 * bayer_pitch; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| //int row; | |
| //int column; | |
| // Need to allocate a scratch buffer for decoding the Bayer frame? | |
| if (decoder->RawBayer16 == NULL) | |
| { | |
| TRANSFORM **transform_array = decoder->transform; | |
| int decoded_width = 0; | |
| int decoded_height = 0; | |
| int resolution = info->resolution; | |
| //int format = info->format; | |
| // Four Bayer data samples at each 2x2 quad in the grid | |
| int pixel_size = 4 * sizeof(PIXEL16U); | |
| int frame_size; | |
| const size_t alignment = 16; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| // Compute the decoded width and height for the specified resolution | |
| GetDecodedFrameDimensions(transform_array, num_channels, frame, resolution, &decoded_width, &decoded_height); | |
| assert(decoded_width > 0 && decoded_height > 0); | |
| if (! (decoded_width > 0 && decoded_height > 0)) { | |
| return CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| } | |
| frame_size = decoded_width * decoded_height * pixel_size; | |
| #if _ALLOCATOR | |
| decoder->RawBayer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)frame_size, alignment); | |
| #else | |
| decoder->RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, alignment); | |
| #endif | |
| assert(decoder->RawBayer16 != NULL); | |
| if (! (decoder->RawBayer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RawBayerSize = frame_size; | |
| //#ifdef SHARPENING | |
| if(decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| int size = frame_size*3; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| size = frame_size*4; | |
| #if _ALLOCATOR | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)size, 16); | |
| #else | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(size, 16); | |
| #endif | |
| assert(decoder->RGBFilterBuffer16 != NULL); | |
| if (! (decoder->RGBFilterBuffer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RGBFilterBufferSize = frame_size*3; | |
| } | |
| //#endif | |
| } | |
| //TODO: Need to add more output formats to this routine | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| // Decode the last transform to rows of Bayer data (one row per channel) | |
| // TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| // decoder->RawBayer16, raw_bayer_pitch, info, | |
| // &decoder->scratch, chroma_offset, precision); | |
| // ConvertPackedBayerToRGB32(decoder->RawBayer16, info, bayer_pitch, | |
| // output_buffer, output_pitch, | |
| // width, height); | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| // Decode the last transform to rows of Bayer data (one row per channel) | |
| //TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| // decoder->RawBayer16, raw_bayer_pitch, info, | |
| // &decoder->scratch, chroma_offset, precision); | |
| //ConvertPackedBayerToRGB24(decoder->RawBayer16, info, bayer_pitch, | |
| // output_buffer, output_pitch, | |
| // width, height); | |
| break; | |
| default: | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| } | |
| return error; | |
| } | |
| // Reconstruct Bayer encoded data and demosaic to full resolution | |
| CODEC_ERROR ReconstructSampleFrameDeBayerFullToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| int precision = codec->precision; | |
| //TRANSFORM **transform_array = decoder->transform; | |
| //int decoded_width = 0; | |
| //int decoded_height = 0; | |
| //int resolution = info->resolution; | |
| int format = info->format; | |
| int width = info->width; | |
| //int height = info->height; | |
| // Compute the number of bytes between each row of Bayer data | |
| int bayer_pitch = 2 * width * sizeof(PIXEL16U); | |
| // Compute the pitch between pairs of rows of bayer data (one pair per image row) | |
| //int raw_bayer_pitch = 2 * bayer_pitch; | |
| int chroma_offset = decoder->codec.chroma_offset; | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RG48: //DAN20090120 added not sure why they weren't here. | |
| case DECODED_FORMAT_WP13: //DAN20090120 "" | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_YR16: | |
| case DECODED_FORMAT_V210: | |
| case DECODED_FORMAT_YU64: | |
| error = CODEC_ERROR_OKAY; | |
| break; | |
| } | |
| if(error) | |
| return error; | |
| //int row; | |
| //int column; | |
| // Need to allocate a scratch buffer for decoding the Bayer frame? | |
| if (decoder->RawBayer16 == NULL) | |
| { | |
| TRANSFORM **transform_array = decoder->transform; | |
| int decoded_width = 0; | |
| int decoded_height = 0; | |
| int resolution = info->resolution; | |
| //int format = info->format; | |
| // Four Bayer data samples at each 2x2 quad in the grid | |
| int pixel_size = 4 * sizeof(PIXEL16U); | |
| int frame_size; | |
| const size_t alignment = 16; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| // Compute the decoded width and height for the specified resolution | |
| GetDecodedFrameDimensions(transform_array, num_channels, frame, resolution, &decoded_width, &decoded_height); | |
| assert(decoded_width > 0 && decoded_height > 0); | |
| if (! (decoded_width > 0 && decoded_height > 0)) { | |
| return CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| } | |
| frame_size = decoded_width * decoded_height * pixel_size; | |
| #if _ALLOCATOR | |
| decoder->RawBayer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)frame_size, alignment); | |
| #else | |
| decoder->RawBayer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, alignment); | |
| #endif | |
| assert(decoder->RawBayer16 != NULL); | |
| if (! (decoder->RawBayer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RawBayerSize = frame_size; | |
| //#ifdef SHARPENING | |
| if(decoder->RGBFilterBuffer16 == NULL) | |
| { | |
| int size = frame_size*3; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| size = frame_size*4; | |
| #if _ALLOCATOR | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)AllocAligned(allocator, (size_t)size, 16); | |
| #else | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(size, 16); | |
| #endif | |
| assert(decoder->RGBFilterBuffer16 != NULL); | |
| if (! (decoder->RGBFilterBuffer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RGBFilterBufferSize = frame_size*3; | |
| } | |
| //#endif | |
| } | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, num_channels, | |
| (uint8_t *)decoder->RawBayer16, bayer_pitch*sizeof(PIXEL), | |
| info, chroma_offset, precision); | |
| //DemosaicRAW | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| int inverted = false; | |
| uint8_t *output = output_buffer; | |
| int pitch = output_pitch; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| if (format == DECODED_FORMAT_RGB24) | |
| { | |
| format = DECODED_FORMAT_RGB24_INVERTED; | |
| inverted = true; | |
| } | |
| else if (format == DECODED_FORMAT_RGB32) | |
| { | |
| format = DECODED_FORMAT_RGB32_INVERTED; | |
| inverted = true; | |
| } | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| if(info->resolution == DECODED_RESOLUTION_FULL_DEBAYER || info->resolution == DECODED_RESOLUTION_HALF_HORIZONTAL_DEBAYER) | |
| height *= 2; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| #else | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| #endif | |
| return error; | |
| } | |
| // Reconstruct Bayer encoded data to half resolution | |
| CODEC_ERROR ReconstructSampleFrameBayerHalfToBuffer(DECODER *decoder, FRAME_INFO *info, int frame, uint8_t *output_buffer, int output_pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| //int precision = codec->precision; | |
| TRANSFORM **transform_array = decoder->transform; | |
| int frame_width = info->width; | |
| int frame_height = info->height; | |
| //int resolution = info->resolution; | |
| int format = info->format; | |
| //IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| PIXEL16U *g1_plane; | |
| PIXEL16U *rg_plane; | |
| PIXEL16U *bg_plane; | |
| PIXEL16U *g2_plane; | |
| int g1_pitch; | |
| int rg_pitch; | |
| int bg_pitch; | |
| int g2_pitch; | |
| #if 0 | |
| int channel; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[frame]; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| char label[_MAX_PATH]; | |
| char *format = decoded_format_string[info->format]; | |
| sprintf(label, "Output, channel: %d, format: %s", channel, format); | |
| DumpImageStatistics(label, lowpass_images[channel], logfile); | |
| } | |
| #endif | |
| } | |
| #endif | |
| // Get the lowpass bands in the wavelet coresponding to the output frame | |
| g1_plane = (PIXEL16U *)transform_array[0]->wavelet[frame]->band[0]; | |
| rg_plane = (PIXEL16U *)transform_array[1]->wavelet[frame]->band[0]; | |
| bg_plane = (PIXEL16U *)transform_array[2]->wavelet[frame]->band[0]; | |
| if(transform_array[3]->wavelet[frame]) //half res don't decode g1-g2 //HACK | |
| { | |
| g2_plane = (PIXEL16U *)transform_array[3]->wavelet[frame]->band[0]; | |
| g2_pitch = transform_array[3]->wavelet[frame]->pitch; | |
| } | |
| else | |
| { | |
| g2_plane = NULL; | |
| g2_pitch = 0; | |
| } | |
| // Get the pitch of each plane | |
| g1_pitch = transform_array[0]->wavelet[frame]->pitch; | |
| rg_pitch = transform_array[1]->wavelet[frame]->pitch; | |
| bg_pitch = transform_array[2]->wavelet[frame]->pitch; | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| ConvertPlanarBayerToRGB32(g1_plane, g1_pitch, rg_plane, rg_pitch, | |
| bg_plane, bg_pitch, g2_plane, g2_pitch, | |
| output_buffer, output_pitch, | |
| frame_width, frame_height); | |
| break; | |
| default: | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| break; | |
| } | |
| return error; | |
| } | |
| // Reconstruct Bayer encoded data to quarter resolution | |
| CODEC_ERROR ReconstructSampleFrameBayerQuarterToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //FRAME_INFO *info = &decoder->frame; | |
| //CODEC_STATE *codec = &decoder->codec; | |
| //int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| //int precision = codec->precision; | |
| //TRANSFORM **transform_array = decoder->transform; | |
| //int decoded_width = 0; | |
| //int decoded_height = 0; | |
| //int resolution = info->resolution; | |
| //int format = info->format; | |
| //TODO: Need to finish this routine | |
| assert(0); | |
| return error; | |
| } | |
| // Reconstruct the original YUV 4:2:2 encoded format to the requested output format | |
| CODEC_ERROR ReconstructSampleFrameYUV422ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| FRAME_INFO *info = &decoder->frame; | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| int progressive = codec->progressive; | |
| int precision = codec->precision; | |
| TRANSFORM **transform_array = decoder->transform; | |
| //int decoded_width = 0; | |
| //int decoded_height = 0; | |
| int resolution = info->resolution; | |
| int format = info->format; | |
| //int color_space = decoder->frame.colorspace; | |
| //TODO: Eliminate use of the chroma offset | |
| int chroma_offset = decoder->codec.chroma_offset; | |
| #if _THREADED | |
| // Type of threaded inverse transform | |
| //int type; | |
| #endif | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| if (decoder == NULL) { | |
| return CODEC_ERROR_INVALID_ARGUMENT; | |
| } | |
| //TODO: Split this routine into subroutines for progressive versus interlaced video | |
| //TODO: Split progressive and interlaced routines into subroutines for each resolution | |
| if(resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| bool inverted = false; | |
| FRAME_INFO info2; | |
| memcpy(&info2, info, sizeof(FRAME_INFO)); | |
| format = info2.format; | |
| if (format == DECODED_FORMAT_RGB24) { | |
| format = DECODED_FORMAT_RGB24_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| else if (format == DECODED_FORMAT_RGB32) { | |
| format = DECODED_FORMAT_RGB32_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| #if 1 | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| #endif | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| mailbox->framenum = frame; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| decoder->RGBFilterBufferPhase = 1; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| decoder->RGBFilterBufferPhase = 0; | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| else | |
| { | |
| int precision = codec->precision; | |
| TRANSFORM **transform_array = decoder->transform; | |
| int channel; | |
| IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[frame]; | |
| } | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, &info2, chroma_offset, | |
| precision, decoder->codec.encoded_format, decoder->frame.white_point); | |
| } | |
| return CODEC_ERROR_OKAY; | |
| } | |
| // Was the video source interlaced or progressive? | |
| if (progressive) | |
| { | |
| // The video source was progressive (the first transform was a spatial transform) | |
| if (resolution == DECODED_RESOLUTION_FULL || resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| FRAME_INFO info2; | |
| int format; | |
| bool inverted = false; | |
| int precision = codec->precision; | |
| memcpy(&info2, info, sizeof(FRAME_INFO)); | |
| format = info2.format; | |
| if (format == DECODED_FORMAT_RGB24) { | |
| format = DECODED_FORMAT_RGB24_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| else if (format == DECODED_FORMAT_RGB32) { | |
| format = DECODED_FORMAT_RGB32_INVERTED; | |
| info2.format = format; | |
| inverted = true; | |
| } | |
| #if 1 | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| #endif | |
| /*if(decoder->use_active_metadata_decoder) | |
| { | |
| switch (format & 0x7ffffff) | |
| { | |
| case DECODED_FORMAT_RGB24: // Output buffer is too small to decode into for | |
| case DECODED_FORMAT_YUYV: // computing the active metadata. | |
| case DECODED_FORMAT_UYVY: | |
| return CODEC_ERROR_OKAY; | |
| break; | |
| } | |
| }*/ | |
| switch (format & 0x7ffffff) | |
| { | |
| case DECODED_FORMAT_RGB24: // Output buffer is too small to decode into for | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| else | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sYUVtoRGB); | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| case DECODED_FORMAT_UYVY: | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| else | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sToYUV); | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| break; | |
| //Handle sizes that are smaller than the interim decode buffer //DAN20081222 | |
| case DECODED_FORMAT_CbYCrY_10bit_2_8: | |
| decoder->upper_plane = output; | |
| decoder->lower_plane = output + decoder->frame.width * decoder->frame.height / 2; | |
| // Use the address and pitch of the lower plane | |
| output = decoder->lower_plane; | |
| pitch = decoder->frame.width * 2; | |
| // Fall through and compute the inverse spatial transform | |
| case DECODED_FORMAT_CbYCrY_16bit_2_14: | |
| case DECODED_FORMAT_CbYCrY_16bit_10_6: | |
| case DECODED_FORMAT_CbYCrY_8bit: | |
| case DECODED_FORMAT_CbYCrY_16bit: | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| else | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sToOutput); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| break; | |
| case DECODED_FORMAT_V210: | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| else | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalYUVStrip16sToYUVOutput); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| break; | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| // As long as the outpitch is greater or equal to 4:2:2 16-bit YR16 this works. | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_R408: | |
| case DECODED_FORMAT_V408: | |
| case DECODED_FORMAT_YU64: | |
| case DECODED_FORMAT_YR16: | |
| case DECODED_FORMAT_WP13: | |
| case DECODED_FORMAT_W13A: | |
| if((format & 0x7FFFFFFF) == DECODED_FORMAT_RGB32 && decoder->use_active_metadata_decoder == false) | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialThreadedYUV422ToBuffer(decoder, | |
| frame, num_channels, output, pitch, | |
| &info2, chroma_offset, precision); | |
| #elif 0 | |
| TransformInverseSpatialToBuffer(decoder, transform_array, frame, | |
| num_channels, output, pitch, | |
| &info2, &decoder->scratch, chroma_offset, precision); | |
| #else | |
| TransformInverseSpatialYUV422ToOutput(decoder, transform_array, | |
| frame, num_channels, output, pitch, | |
| &info2, &decoder->scratch, chroma_offset, precision, | |
| InvertHorizontalStripYUV16sToPackedRGB32); | |
| #endif | |
| return CODEC_ERROR_OKAY; | |
| } | |
| #if _THREADED | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| else | |
| { | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, | |
| num_channels, output, pitch, | |
| &info2, chroma_offset, precision); | |
| ConvertRow16uToOutput(decoder, frame, num_channels, output, pitch, | |
| &info2, chroma_offset, precision); | |
| return CODEC_ERROR_OKAY; | |
| } | |
| #endif | |
| break; | |
| default: | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sThruActiveMetadata); | |
| return CODEC_ERROR_OKAY; | |
| #endif | |
| } | |
| // else Return the error code for unsupported output format | |
| break; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| // The video source was interlaced (the first transform was a frame transform) | |
| if (resolution == DECODED_RESOLUTION_FULL || resolution == DECODED_RESOLUTION_HALF_HORIZONTAL) | |
| { | |
| bool inverted = false; | |
| if (format == DECODED_FORMAT_RGB32 || format == DECODED_FORMAT_RGB24) { | |
| // info->format = DECODED_FORMAT_RGB32_INVERTED; //DAN20080702 vertically flips QT decodes if active. | |
| inverted = true; | |
| } | |
| #if 1 | |
| // Have the output location and pitch been inverted? | |
| if (inverted && pitch > 0) { | |
| int height = info->height; | |
| output += (height - 1) * pitch; // Start at the bottom row | |
| pitch = NEG(pitch); // Negate the pitch to go up | |
| } | |
| #endif | |
| switch (format & 0x7ffffff) | |
| { | |
| case DECODED_FORMAT_NV12: | |
| case DECODED_FORMAT_RGB24: // Output buffer is too small to decode into for | |
| case DECODED_FORMAT_YUYV: | |
| case DECODED_FORMAT_UYVY: | |
| case DECODED_FORMAT_V210: // only supported with use_active_metadata_decoder | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| int frame_size = info->width * info->height * 4; | |
| if(decoder->RGBFilterBuffer16==NULL || decoder->RGBFilterBufferSize < frame_size) | |
| { | |
| #if _ALLOCATOR | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| } | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)AllocAligned(allocator, frame_size, 16); | |
| #else | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| } | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, 16); | |
| #endif | |
| assert(decoder->RGBFilterBuffer16 != NULL); | |
| if (! (decoder->RGBFilterBuffer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RGBFilterBufferSize = frame_size; | |
| } | |
| //TransformInverseSpatialUniversalThreadedToRow16u( | |
| // decoder, frame, num_channels, | |
| // (uint8_t *)decoder->RGBFilterBuffer16, info->width * 3 * 2, | |
| // info, chroma_offset, precision); | |
| #if _INTERLACED_WORKER_THREADS | |
| StartInterlaceWorkerThreads(decoder); | |
| //TODO: support new threading | |
| // Send the upper and lower rows of the transforms to the worker threads | |
| TransformInverseFrameThreadedToRow16u(decoder, frame, num_channels, | |
| (PIXEL16U *)decoder->RGBFilterBuffer16, | |
| info->width * 4, | |
| info, chroma_offset, precision); | |
| #else | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| TransformInverseFrameToRow16u(decoder, transform_array, frame, num_channels, | |
| (PIXEL16U *)decoder->RGBFilterBuffer16, | |
| info->width * 4, info, | |
| &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| decoder->RGBFilterBufferPhase = 2; // yuv | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| decoder->RGBFilterBufferPhase = 0; | |
| } | |
| #endif | |
| return CODEC_ERROR_OKAY; | |
| } | |
| } | |
| switch (format) | |
| { | |
| // As long as the outpitch is greater or equal to 4:2:2 16-bit YR16 this works. | |
| case DECODED_FORMAT_WP13: //DAN20110203 - missing | |
| case DECODED_FORMAT_W13A: //DAN20110203 - missing | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_RG64: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_RGB32: //32-bit format can fit the interim YR16 decode into | |
| case DECODED_FORMAT_R408: //the output buffer | |
| case DECODED_FORMAT_V408: | |
| case DECODED_FORMAT_YU64: | |
| case DECODED_FORMAT_YR16: | |
| #if _INTERLACED_WORKER_THREADS | |
| StartInterlaceWorkerThreads(decoder); | |
| //TODO: support new threading | |
| // Send the upper and lower rows of the transforms to the worker threads | |
| TransformInverseFrameThreadedToRow16u(decoder, frame, num_channels, | |
| (PIXEL16U *)output, pitch, | |
| info, chroma_offset, precision); | |
| ConvertRow16uToOutput(decoder, frame, num_channels, output, pitch, | |
| info, chroma_offset, precision); | |
| #else | |
| // Transform the wavelets for each channel to the output image (not threaded) | |
| TransformInverseFrameToRow16u(decoder, transform_array, frame, num_channels, | |
| (PIXEL16U *)output, pitch, info, | |
| &decoder->scratch, chroma_offset, precision); | |
| ConvertRow16uToOutput(decoder, frame, num_channels, output, pitch, | |
| info, chroma_offset, precision); | |
| //Old code converts 4:2:2 directly to RGBA (single threaded.) | |
| //TransformInverseFrameToBuffer(transform_array, frame, num_channels, output, pitch, | |
| // info, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| return CODEC_ERROR_OKAY; | |
| default: | |
| // else Return the error code for unsupported output format | |
| break; | |
| } | |
| } | |
| } | |
| // The output format is not supported by this routine | |
| error = CODEC_ERROR_UNSUPPORTED_FORMAT; | |
| return error; | |
| } | |
| // Routines for converting the new encoded formats to the requested output format | |
| CODEC_ERROR ReconstructSampleFrameRGB444ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) | |
| { | |
| CODEC_ERROR error = CODEC_ERROR_OKAY; | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| FRAME_INFO *info = &decoder->frame; | |
| CODEC_STATE *codec = &decoder->codec; | |
| int num_channels = codec->num_channels; | |
| //int progressive = codec->progressive; | |
| TRANSFORM **transform_array = decoder->transform; | |
| //IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| //IMAGE *wavelet; | |
| //int wavelet_width; | |
| //int wavelet_height; | |
| int decoded_width = 0; | |
| int decoded_height = 0; | |
| int resolution = info->resolution; | |
| //int chroma_offset = decoder->codec.chroma_offset; | |
| //int decoded_scale; | |
| #if _ALLOCATOR | |
| ALLOCATOR *allocator = decoder->allocator; | |
| #endif | |
| //TODO: Eliminate use of the chroma offset | |
| if (decoder == NULL) { | |
| return CODEC_ERROR_INVALID_ARGUMENT; | |
| } | |
| // This routine should only be called for progressive frames | |
| assert(codec->progressive); | |
| // The decoder can decode a video sample without returning a frame | |
| if (output == NULL || pitch == 0) { | |
| return CODEC_ERROR_OKAY; | |
| } | |
| // Does this frame have to be reconstructed? | |
| if ((decoder->flags & DECODER_FLAGS_RENDER) == 0) { | |
| return CODEC_ERROR_OKAY; | |
| } | |
| // Check that the requested frame is within the limits of the group of frames | |
| assert(0 <= frame && frame < decoder->gop_length); | |
| // Check that the frame resolution is valid | |
| assert(IsValidFrameResolution(resolution)); | |
| if (!IsValidFrameResolution(resolution)) { | |
| return CODEC_ERROR_RESOLUTION; | |
| } | |
| // Compute the decoded width and height | |
| ComputeOutputDimensions(decoder, frame, &decoded_width, &decoded_height); | |
| assert(decoded_width > 0 && decoded_height > 0); | |
| if (info->format == DECODED_FORMAT_RGB24 || info->format == DECODED_FORMAT_RGB32) | |
| { | |
| output += (info->height-1)*pitch; | |
| pitch = -pitch; | |
| } | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| IMAGE *wavelet = transform[0]->wavelet[frame]; | |
| int band = 0; | |
| fprintf(logfile, "Luminance wavelet, frame: %d, band: %d\n", frame, band); | |
| DumpArray16s("Lowpass Band", wavelet->band[band], wavelet->width, wavelet->height, wavelet->pitch, logfile); | |
| } | |
| #endif | |
| // Check that the requested frame is large enough to hold the decoded frame | |
| #if (0 && DEBUG) | |
| //if (! (info->width >= decoded_width)) | |
| { | |
| if (logfile) { | |
| //fprintf(logfile, "Requested frame not large enough to hold decoded frame: %d < %d\n", info->width, decoded_width); | |
| fprintf(logfile, "Output frame width: %d, decoded frame width: %d\n", info->width, decoded_width); | |
| } | |
| } | |
| #endif | |
| assert(info->width >= decoded_width); | |
| if (!(info->width >= decoded_width)) { | |
| return CODEC_ERROR_FRAMESIZE; | |
| } | |
| // assert((info->height+7)/8 >= (decoded_height+7)/8); | |
| // if (!(info->height+7)/8 >= (decoded_height+7)/8) { | |
| // return CODEC_ERROR_FRAMESIZE; | |
| // } | |
| START(tk_convert); | |
| if (resolution == DECODED_RESOLUTION_LOWPASS_ONLY) | |
| { | |
| //int precision = codec->precision; | |
| int scale = 13; | |
| int channel; | |
| IMAGE *lowpass_images[TRANSFORM_MAX_CHANNELS]; | |
| int chroma_offset = decoder->codec.chroma_offset; | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| lowpass_images[channel] = transform_array[channel]->wavelet[5]; | |
| if(lowpass_images[channel] == NULL) // therefore IntreFrame compressed. | |
| { | |
| scale = 12; | |
| lowpass_images[channel] = transform_array[channel]->wavelet[2]; | |
| } | |
| } | |
| CopyLowpass16sToBuffer(decoder, lowpass_images, num_channels, output, pitch, info, chroma_offset, | |
| scale, decoder->codec.encoded_format, decoder->frame.white_point); | |
| } | |
| else | |
| // Quarter resolution | |
| if (resolution == DECODED_RESOLUTION_QUARTER) | |
| { | |
| // Output quarter resolution for the two frame GOP | |
| int precision = codec->precision; | |
| // Reconstruct the frame to quarter resolution | |
| ReconstructQuarterFrame(decoder, num_channels, frame, output, pitch, | |
| info, &decoder->scratch, precision); | |
| // Quarter resolution one frame GOP is handled in DecodeSampleIntraFrame | |
| } | |
| else | |
| // Half resolution | |
| if (resolution == DECODED_RESOLUTION_HALF) | |
| { | |
| IMAGE *wavelet_array[TRANSFORM_MAX_CHANNELS]; | |
| int precision = codec->precision; | |
| int chroma_offset = 0; | |
| int channel; | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| mailbox->framenum = frame; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| decoder->RGBFilterBufferPhase = 1; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| decoder->RGBFilterBufferPhase = 0; | |
| } | |
| #endif | |
| } | |
| else | |
| { | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| // Get the first level wavelet in each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| wavelet_array[channel] = transform_array[channel]->wavelet[frame]; | |
| } | |
| // Pack the pixels from the lowpass band in each channel into the output buffer | |
| CopyLowpassRGB444ToBuffer(decoder, wavelet_array, num_channels, output, pitch, | |
| info, chroma_offset, precision); | |
| } | |
| } | |
| // Full resolution or half horizontal | |
| else | |
| { | |
| int chroma_offset = 0; | |
| int precision = codec->precision; | |
| // Reconstruct the output frame from a full resolution decode | |
| //assert(resolution == DECODED_RESOLUTION_FULL); | |
| if(decoder->use_active_metadata_decoder) | |
| { | |
| int frame_size, channels = 3; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| channels = 4; | |
| frame_size = info->width * info->height * channels * 2; | |
| if(decoder->RGBFilterBuffer16==NULL || decoder->RGBFilterBufferSize < frame_size) | |
| { | |
| #if _ALLOCATOR | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| FreeAligned(decoder->allocator, decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| } | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)AllocAligned(allocator, frame_size, 16); | |
| #else | |
| if(decoder->RGBFilterBuffer16) | |
| { | |
| MEMORY_ALIGNED_FREE(decoder->RGBFilterBuffer16); | |
| decoder->RGBFilterBuffer16 = NULL; | |
| } | |
| decoder->RGBFilterBuffer16 = (PIXEL16U *)MEMORY_ALIGNED_ALLOC(frame_size, 16); | |
| #endif | |
| assert(decoder->RGBFilterBuffer16 != NULL); | |
| if (! (decoder->RGBFilterBuffer16 != NULL)) { | |
| return CODEC_ERROR_MEMORY_ALLOC; | |
| } | |
| decoder->RGBFilterBufferSize = frame_size; | |
| } | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToRow16u(decoder, frame, num_channels, | |
| (uint8_t *)decoder->RGBFilterBuffer16, info->width * channels * 2, | |
| info, chroma_offset, precision); | |
| #else | |
| // Decode that last transform to rows of Bayer data (one row per channel) | |
| TransformInverseSpatialToRow16u(transform_array, frame, num_channels, | |
| (uint8_t *)decoder->RGBFilterBuffer16, info->width * channels * 2, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| #if _THREADED | |
| { | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->jobType = JOB_TYPE_OUTPUT; | |
| decoder->RGBFilterBufferPhase = 1; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, info->height); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| decoder->RGBFilterBufferPhase = 0; | |
| } | |
| #endif | |
| } | |
| else | |
| { | |
| //DAN20081203 -- fix for 444 decodes in AE32-bit float | |
| decoder->frame.white_point = 16; | |
| //decoder->frame.signed_pixels = 0; | |
| switch (info->format) | |
| { | |
| case DECODED_FORMAT_B64A: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2B64A); | |
| #else | |
| TransformInverseRGB444ToB64A(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| break; | |
| case DECODED_FORMAT_YU64: //TODO : Threading | |
| TransformInverseRGB444ToYU64(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| break; | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| case DECODED_FORMAT_RGB32: | |
| case DECODED_FORMAT_RGB32_INVERTED://TODO, needs to be threaded. WIP | |
| TransformInverseRGB444ToRGB32(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| break; | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_RG64: //TODO, needs to be threaded. WIP | |
| TransformInverseRGB444ToRGB48(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| break; | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2RG30); | |
| #else | |
| TransformInverseRGB444ToRGB48(transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision); | |
| #endif | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| case DECODED_FORMAT_UYVY: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2YUV); | |
| #else | |
| TransformInverseSpatialYUV422ToOutput(decoder, transform_array, frame, num_channels, output, pitch, | |
| info, &decoder->scratch, chroma_offset, precision, | |
| InvertHorizontalStripRGB16sToPackedYUV8u); | |
| #endif | |
| break; | |
| case DECODED_FORMAT_R408: | |
| case DECODED_FORMAT_V408: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGBA2YUVA); | |
| #else | |
| assert(0); | |
| #endif | |
| break; | |
| case DECODED_FORMAT_YR16: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2YR16); | |
| #else | |
| assert(0);// missing non-threaded version | |
| #endif | |
| break; | |
| case DECODED_FORMAT_V210: | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2v210); | |
| #else | |
| assert(0);// missing non-threaded version | |
| #endif | |
| break; | |
| case DECODED_FORMAT_CbYCrY_8bit: // DECODED_FORMAT_CT_UCHAR | |
| #if _THREADED | |
| TransformInverseSpatialUniversalThreadedToOutput(decoder, frame, num_channels, | |
| output, pitch, | |
| info, chroma_offset, precision, | |
| InvertHorizontalStrip16sRGB2YUV); | |
| #else | |
| assert(0);// missing non-threaded version | |
| #endif | |
| break; | |
| //TODO: Add code to handle other Avid pixel formats | |
| case DECODED_FORMAT_CbYCrY_16bit: // DECODED_FORMAT_CT_SHORT | |
| case DECODED_FORMAT_CbYCrY_10bit_2_8: // DECODED_FORMAT_CT_10Bit_2_8 | |
| case DECODED_FORMAT_CbYCrY_16bit_2_14: // DECODED_FORMAT_CT_SHORT_2_14 | |
| case DECODED_FORMAT_CbYCrY_16bit_10_6: // DECODED_FORMAT_CT_USHORT_10_6 | |
| assert(0); | |
| break; | |
| default: | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Invalid decoded format: %d\n", info->format); | |
| } | |
| #endif | |
| assert(0); | |
| error = CODEC_ERROR_INVALID_FORMAT; | |
| break; | |
| } | |
| } | |
| } | |
| STOP(tk_convert); | |
| return error; | |
| } | |
| // Convert 16-bit signed lowpass data into the requested output format | |
| void CopyLowpassRGB444ToBuffer(DECODER *decoder, IMAGE *image_array[], int num_channels, | |
| uint8_t *output_buffer, int32_t output_pitch, | |
| FRAME_INFO *info, int chroma_offset, | |
| int precision) | |
| { | |
| bool inverted = false; | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| int format = info->format; | |
| // Left shift to scale the pixels to 16 bits minus the shift already in the lowpass values | |
| const int shift = 16 - precision - PRESCALE_LUMA; | |
| START(tk_convert); | |
| #if 0 | |
| // Fill the output buffer with blank values | |
| EraseOutputBuffer(output_buffer, info->width, info->height, output_pitch, info->format); | |
| #endif | |
| // Determine the type of conversion | |
| switch (info->format) | |
| { | |
| case DECODED_FORMAT_RGB24: | |
| case DECODED_FORMAT_RGB32: | |
| inverted = true; | |
| case DECODED_FORMAT_RGB24_INVERTED: | |
| case DECODED_FORMAT_RGB32_INVERTED: | |
| case DECODED_FORMAT_B64A: | |
| case DECODED_FORMAT_R210: | |
| case DECODED_FORMAT_DPX0: | |
| case DECODED_FORMAT_RG30: | |
| case DECODED_FORMAT_AR10: | |
| case DECODED_FORMAT_AB10: | |
| case DECODED_FORMAT_RG48: | |
| case DECODED_FORMAT_RG64: //WIP | |
| ConvertLowpassRGB444ToRGB(image_array, output_buffer, output_width, output_height, | |
| output_pitch, format, inverted, shift, num_channels); | |
| break; | |
| case DECODED_FORMAT_YUYV: | |
| case DECODED_FORMAT_UYVY: | |
| { | |
| IMAGE *g_image = image_array[0]; | |
| IMAGE *r_image = image_array[1]; | |
| IMAGE *b_image = image_array[2]; | |
| if (info->format == COLOR_FORMAT_YUYV) | |
| { | |
| ConvertRGB2YUV(r_image->band[0], g_image->band[0], b_image->band[0], | |
| r_image->pitch, g_image->pitch, b_image->pitch, | |
| output_buffer, output_pitch, | |
| output_width, output_height, 14, | |
| info->colorspace, info->format); | |
| } | |
| else if (info->format == COLOR_FORMAT_UYVY) | |
| { | |
| ConvertRGB2UYVY(r_image->band[0], g_image->band[0], b_image->band[0], | |
| r_image->pitch, g_image->pitch, b_image->pitch, | |
| output_buffer, output_pitch, | |
| output_width, output_height, 14, | |
| info->colorspace, info->format); | |
| } | |
| } | |
| break; | |
| default: | |
| { | |
| int y; | |
| IMAGE *g_image = image_array[0]; | |
| IMAGE *r_image = image_array[1]; | |
| IMAGE *b_image = image_array[2]; | |
| IMAGE *a_image = image_array[3]; | |
| unsigned short *scanline = (unsigned short *)decoder->scratch.free_ptr; | |
| //unsigned short *scanline2 = scanline + output_width*3; | |
| uint8_t *newline = (uint8_t *)output_buffer; | |
| unsigned short *Rptr,*Gptr,*Bptr,*Aptr = NULL; | |
| Rptr = (unsigned short *)r_image->band[0]; | |
| Gptr = (unsigned short *)g_image->band[0]; | |
| Bptr = (unsigned short *)b_image->band[0]; | |
| if(decoder->codec.encoded_format == ENCODED_FORMAT_RGBA_4444 && ALPHAOUTPUT(decoder->frame.format)) | |
| { | |
| Aptr = (unsigned short *)a_image->band[0]; | |
| for(y=0; y<output_height; y++) | |
| { | |
| int flags = (ACTIVEMETADATA_PLANAR); | |
| int whitebitdepth = 14; | |
| memcpy(scanline, Rptr, info->width*2); | |
| memcpy(scanline+info->width, Gptr, info->width*2); | |
| memcpy(scanline+info->width*2, Bptr, info->width*2); | |
| memcpy(scanline+info->width*3, Aptr, info->width*2); | |
| Rptr += r_image->pitch/2; | |
| Gptr += g_image->pitch/2; | |
| Bptr += b_image->pitch/2; | |
| Aptr += a_image->pitch/2; | |
| Convert4444LinesToOutput(decoder, info->width, 1, y, scanline, | |
| newline, output_pitch, info->format, whitebitdepth, flags); | |
| newline += output_pitch; | |
| } | |
| } | |
| else | |
| { | |
| for(y=0; y<output_height; y++) | |
| { | |
| int flags = (ACTIVEMETADATA_PLANAR); | |
| int whitebitdepth = 14; | |
| memcpy(scanline, Rptr, info->width*2); | |
| memcpy(scanline+info->width, Gptr, info->width*2); | |
| memcpy(scanline+info->width*2, Bptr, info->width*2); | |
| Rptr += r_image->pitch/2; | |
| Gptr += g_image->pitch/2; | |
| Bptr += b_image->pitch/2; | |
| ConvertLinesToOutput(decoder, info->width, 1, y, scanline, | |
| newline, output_pitch, info->format, whitebitdepth, flags); | |
| newline += output_pitch; | |
| } | |
| } | |
| } | |
| //assert(0); | |
| break; | |
| } | |
| STOP(tk_convert); | |
| } | |
| #if _THREADED | |
| // Threaded inverse transform using the new threads API | |
| void TransformInverseSpatialThreadedYUV422ToBuffer(DECODER *decoder, int frame_index, int num_channels, | |
| uint8_t *output, int pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| //TODO: Add support for more output formats | |
| int format = DECODED_FORMAT_RGB32; | |
| // The upper and lower spatial transforms only share the middle rows | |
| int transform_height = (((info->height + 7) / 8) * 8) / 2; | |
| int middle_row_count = transform_height; | |
| // Data structure for passing information to the worker threads | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| // Inverse horizontal filter that outputs the desired format | |
| HorizontalInverseFilterOutputProc horizontal_filter_proc; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Choose the correct inverse horizontal filter for the output format | |
| switch (format) | |
| { | |
| case DECODED_FORMAT_RGB32: | |
| horizontal_filter_proc = InvertHorizontalStripYUV16sToPackedRGB32; | |
| break; | |
| default: | |
| assert(0); | |
| return; | |
| } | |
| // Post a message to the mailbox | |
| mailbox->horizontal_filter_proc = horizontal_filter_proc; | |
| mailbox->frame = frame_index; | |
| mailbox->num_channels = num_channels; | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->chroma_offset = chroma_offset; | |
| mailbox->precision = precision; | |
| mailbox->jobType = JOB_TYPE_WAVELET; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, middle_row_count); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| #if (1 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "All worker threads signalled done\n"); | |
| } | |
| #endif | |
| } | |
| // Threaded inverse transform using the new threads API | |
| // Convert RGB RGBA or BAYER (4 channel) data to a 16-bit planar format | |
| void TransformInverseSpatialUniversalThreadedToRow16u(DECODER *decoder, int frame_index, int num_channels, | |
| uint8_t *output, int pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| // The upper and lower spatial transforms only share the middle rows | |
| int transform_height = (((info->height + 7) / 8) * 8) / 2; | |
| int middle_row_count = transform_height; | |
| // Data structure for passing information to the worker threads | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| // Inverse horizontal filter that outputs the desired format | |
| HorizontalInverseFilterOutputProc horizontal_filter_proc; | |
| horizontal_filter_proc = InvertHorizontalStrip16sToRow16uPlanar; | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->horizontal_filter_proc = horizontal_filter_proc; | |
| mailbox->frame = frame_index; | |
| mailbox->num_channels = num_channels; | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->chroma_offset = chroma_offset; | |
| mailbox->precision = precision; | |
| mailbox->jobType = JOB_TYPE_WAVELET; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, middle_row_count); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| // Threaded inverse transform using the new threads API | |
| // Convert RGB RGBA or BAYER (4 channel) data to a 16-bit planar format | |
| void TransformInverseSpatialUniversalThreadedToOutput( | |
| DECODER *decoder, int frame_index, int num_channels, | |
| uint8_t *output, int pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision, | |
| HorizontalInverseFilterOutputProc horizontal_filter_proc) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| // The upper and lower spatial transforms only share the middle rows | |
| int transform_height = (((info->height + 7) / 8) * 8) / 2; | |
| int middle_row_count = transform_height; | |
| // Data structure for passing information to the worker threads | |
| WORKER_THREAD_DATA *mailbox = &decoder->worker_thread.data; | |
| // Inverse horizontal filter that outputs the desired format | |
| #if _DELAY_THREAD_START | |
| if(decoder->worker_thread.pool.thread_count == 0) | |
| { | |
| CreateLock(&decoder->worker_thread.lock); | |
| // Initialize the pool of transform worker threads | |
| ThreadPoolCreate(&decoder->worker_thread.pool, | |
| decoder->thread_cntrl.capabilities >> 16/*cpus*/, | |
| WorkerThreadProc, | |
| decoder); | |
| } | |
| #endif | |
| // Post a message to the mailbox | |
| mailbox->horizontal_filter_proc = horizontal_filter_proc; | |
| mailbox->frame = frame_index; | |
| mailbox->num_channels = num_channels; | |
| mailbox->output = output; | |
| mailbox->pitch = pitch; | |
| memcpy(&mailbox->info, info, sizeof(FRAME_INFO)); | |
| mailbox->chroma_offset = chroma_offset; | |
| mailbox->precision = precision; | |
| mailbox->jobType = JOB_TYPE_WAVELET; | |
| // Set the work count to the number of rows to process | |
| ThreadPoolSetWorkCount(&decoder->worker_thread.pool, middle_row_count); | |
| // Start the transform worker threads | |
| ThreadPoolSendMessage(&decoder->worker_thread.pool, THREAD_MESSAGE_START); | |
| // Wait for all of the worker threads to finish | |
| ThreadPoolWaitAllDone(&decoder->worker_thread.pool); | |
| } | |
| // Routines for the worker threads that use the new threads API | |
| void TransformInverseSpatialSectionToOutput(DECODER *decoder, int thread_index, | |
| int frame_index, int num_channels, | |
| uint8_t *output_buffer, int output_pitch, FRAME_INFO *info, | |
| int chroma_offset, int precision, | |
| HorizontalInverseFilterOutputProc horizontal_filter_proc) | |
| { | |
| #if (1 && DEBUG) | |
| FILE *logfile = decoder->logfile; | |
| #endif | |
| TRANSFORM **transform = decoder->transform; | |
| const SCRATCH *scratch = &decoder->scratch; | |
| PIXEL *lowlow_band[CODEC_MAX_CHANNELS]; | |
| PIXEL *lowhigh_band[CODEC_MAX_CHANNELS]; | |
| PIXEL *highlow_band[CODEC_MAX_CHANNELS]; | |
| PIXEL *highhigh_band[CODEC_MAX_CHANNELS]; | |
| int lowlow_pitch[CODEC_MAX_CHANNELS]; | |
| int lowhigh_pitch[CODEC_MAX_CHANNELS]; | |
| int highlow_pitch[CODEC_MAX_CHANNELS]; | |
| int highhigh_pitch[CODEC_MAX_CHANNELS]; | |
| int channel_width[CODEC_MAX_CHANNELS]; | |
| uint8_t *output_row_ptr; | |
| uint8_t *plane_array[TRANSFORM_MAX_CHANNELS]; | |
| int plane_pitch[TRANSFORM_MAX_CHANNELS]; | |
| int output_width = info->width; | |
| int output_height = info->height; | |
| int half_height = output_height/2; | |
| int luma_band_width; | |
| ROI strip; | |
| char *bufptr; | |
| int last_row; | |
| int last_display_row; | |
| int last_line; | |
| int channel; | |
| int row; | |
| int odd_display_lines = 0; | |
| THREAD_ERROR error; | |
| // Push the scratch space state to allocate a new section | |
| char *buffer = scratch->free_ptr; | |
| size_t buffer_size = scratch->free_size; | |
| //TODO: Replace uses of buffer variables with calls to the scratch space API | |
| // This version is for 16-bit pixels | |
| assert(sizeof(PIXEL) == 2); | |
| // Must have a valid inverse horizontal filter | |
| assert(horizontal_filter_proc != NULL); | |
| // Check for enough space in the local array allocations | |
| // assert(num_channels <= CODEC_NUM_CHANNELS); | |
| assert(num_channels <= TRANSFORM_MAX_CHANNELS); | |
| // Divide the buffer space between the four threads | |
| buffer_size /= decoder->worker_thread.pool.thread_count; // used to assume max of 4 | |
| buffer += buffer_size * thread_index; | |
| // Round the buffer pointer up to the next cache line | |
| buffer_size -= (_CACHE_LINE_SIZE - ((uintptr_t)buffer & _CACHE_LINE_MASK)); | |
| bufptr = (char *)ALIGN(buffer, _CACHE_LINE_SIZE); | |
| // Allocate buffer space for the output rows from each channel | |
| for (channel = 0; channel < num_channels; channel++) | |
| { | |
| // Get the row width for this channel | |
| IMAGE *wavelet = transform[channel]->wavelet[frame_index]; | |
| int width = wavelet->width; | |
| int height = wavelet->height; | |
| //int pitch = wavelet->pitch; | |
| size_t channel_buffer_size; | |
| // Compute the width and pitch for the output rows stored in this buffer | |
| int buffer_width = 2 * width; | |
| int buffer_height = 2; | |
| int buffer_pitch = ALIGN16(buffer_width); | |
| // Compute the total allocation for this channel | |
| channel_buffer_size = buffer_height * buffer_pitch; | |
| // Check that there is enough space available | |
| assert(channel_buffer_size <= buffer_size); | |
| // Allocate the buffer for this channel | |
| plane_array[channel] = (uint8_t *)bufptr; | |
| // Remember the pitch for rows in this channel | |
| plane_pitch[channel] = buffer_pitch; | |
| // Advance the buffer pointer past the allocated space for this channel | |
| bufptr += channel_buffer_size; | |
| // Reduce the amount of space remaining in the buffer | |
| buffer_size -= channel_buffer_size; | |
| // The dimensions of the output image are the same as the luma channel | |
| if (channel == 0) | |
| { | |
| strip.width = buffer_width; | |
| strip.height = buffer_height; | |
| last_row = height; | |
| //DAN20050606 Added to fix issue with non-div by 8 heihts. | |
| last_display_row = (info->height+1)/2; // DAN20090215 -- fix for odd display lines. | |
| odd_display_lines = info->height & 1; | |
| // Remember the width of the wavelet bands for luma | |
| luma_band_width = width; | |
| } | |
| // Save the bands per channel for routines that process all channels at once | |
| lowlow_band[channel] = wavelet->band[0]; | |
| lowhigh_band[channel] = wavelet->band[1]; | |
| highlow_band[channel] = wavelet->band[2]; | |
| highhigh_band[channel] = wavelet->band[3]; | |
| lowlow_pitch[channel] = wavelet->pitch; | |
| lowhigh_pitch[channel] = wavelet->pitch; | |
| highlow_pitch[channel] = wavelet->pitch; | |
| highhigh_pitch[channel] = wavelet->pitch; | |
| // Remember the width of the wavelet for this channel | |
| channel_width[channel] = width; | |
| } | |
| // Use the remaining buffer space for intermediate results | |
| buffer_size -= (_CACHE_LINE_SIZE - ((uintptr_t)bufptr & _CACHE_LINE_MASK)); | |
| buffer = (char *)ALIGN(bufptr, _CACHE_LINE_SIZE); | |
| if (last_row == last_display_row) | |
| { | |
| last_line = half_height - 1; | |
| } | |
| else | |
| { | |
| last_line = half_height; | |
| } | |
| if(odd_display_lines) | |
| last_line++; | |
| if (thread_index == TRANSFORM_WORKER_TOP_THREAD) | |
| { | |
| // Process the first row | |
| row = 0; | |
| output_row_ptr = output_buffer; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Thread: %d, processing row: %d\n", thread_index, row); | |
| } | |
| #endif | |
| // Process the first row using special border filters for the top row | |
| InvertSpatialTopRow16sToOutput(decoder, thread_index, lowlow_band, lowlow_pitch, | |
| lowhigh_band, lowhigh_pitch, | |
| highlow_band, highlow_pitch, | |
| highhigh_band, highhigh_pitch, | |
| output_row_ptr, output_pitch, | |
| output_width, info->format, info->colorspace, | |
| row, channel_width, | |
| (PIXEL *)buffer, buffer_size, | |
| precision, | |
| horizontal_filter_proc); | |
| } | |
| if (thread_index == TRANSFORM_WORKER_BOTTOM_THREAD || decoder->worker_thread.pool.thread_count == 1) | |
| { | |
| if(last_row == last_display_row) //DAN20071218 -- Added as old 1080 RAW files would crash | |
| { | |
| int pitch = output_pitch; | |
| // Process the last row | |
| row = last_row - 1; | |
| if(decoder->channel_decodes > 1 && decoder->frame.format == DECODED_FORMAT_YUYV) // 3d work | |
| if(decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC) | |
| pitch >>= 1; | |
| // Begin filling the last output row with results | |
| output_row_ptr = output_buffer + row * 2 * pitch; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Thread: %d, processing row: %d\n", thread_index, row); | |
| } | |
| #endif | |
| // Process the last row using special border filters for the bottom row | |
| if(decoder->channel_decodes > 1 && decoder->frame.format == DECODED_FORMAT_YUYV) | |
| if(decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC || decoder->channel_blend_type == BLEND_LINE_INTERLEAVED) // 3d Work TODO Fix | |
| output_row_ptr -= output_pitch; | |
| InvertSpatialBottomRow16sToOutput(decoder, thread_index, lowlow_band, lowlow_pitch, | |
| lowhigh_band, lowhigh_pitch, | |
| highlow_band, highlow_pitch, | |
| highhigh_band, highhigh_pitch, | |
| output_row_ptr, output_pitch, | |
| output_width, info->format, info->colorspace, | |
| row, channel_width, | |
| (PIXEL *)buffer, buffer_size, | |
| precision, odd_display_lines, | |
| horizontal_filter_proc); | |
| } | |
| } | |
| // Loop until all of the middle rows have been processed | |
| for (;;) | |
| { | |
| int work_index; | |
| int row; | |
| // Wait for one row from each channel to process | |
| error = PoolThreadWaitForWork(&decoder->worker_thread.pool, &work_index, thread_index); | |
| // Is there another row to process? | |
| if (error == THREAD_ERROR_OKAY) | |
| { | |
| int pitch = output_pitch; | |
| // Compute the next row to process from the work index | |
| row = work_index + 1; | |
| if(decoder->channel_decodes > 1 && decoder->frame.format == DECODED_FORMAT_YUYV) // 3d work | |
| if(decoder->channel_blend_type == BLEND_STACKED_ANAMORPHIC) // stacked | |
| pitch >>= 1; | |
| // Compute the output row corresponding to this row index | |
| output_row_ptr = output_buffer + row * 2 * pitch; | |
| } | |
| else | |
| { | |
| // No more work to do | |
| return; | |
| } | |
| // Is the row inside the top and bottom border? | |
| if (0 < row && row < last_line) | |
| { | |
| int outputlines = 2; | |
| #if (0 && DEBUG) | |
| if (logfile) { | |
| fprintf(logfile, "Thread: %d, processing row: %d\n", thread_index, row); | |
| } | |
| #endif | |
| if(odd_display_lines && row==last_line-1) | |
| { | |
| outputlines = 1; | |
| } | |
| // Process the middle row using the normal wavelet filters | |
| InvertSpatialMiddleRow16sToOutput(decoder, thread_index, | |
| lowlow_band, lowlow_pitch, | |
| lowhigh_band, lowhigh_pitch, | |
| highlow_band, highlow_pitch, | |
| highhigh_band, highhigh_pitch, | |
| output_row_ptr, output_pitch, | |
| output_width, info->format, info->colorspace, | |
| row, channel_width, | |
| (PIXEL *)buffer, buffer_size, | |
| precision, | |
| horizontal_filter_proc, | |
| outputlines); | |
| } | |
| } | |
| } | |
| #endif //_THREADED | |
| bool GetTuplet(unsigned char *data, int datasize, | |
| unsigned short findtag, unsigned short *retvalue) | |
| { | |
| bool ret = false; | |
| BITSTREAM myinput, *pinput; | |
| TAGVALUE segment; | |
| TAGWORD tag,value; | |
| int error = 0; | |
| //char t[100]; | |
| InitBitstream(&myinput); | |
| myinput.lpCurrentWord = data; | |
| myinput.nWordsUsed = datasize; | |
| pinput = &myinput; | |
| do | |
| { | |
| bool optional = false; | |
| int chunksize = 0; | |
| // Read the next tag value pair from the bitstream | |
| segment = GetSegment(pinput); | |
| tag = segment.tuple.tag; | |
| value = segment.tuple.value; | |
| // Is this an optional tag? | |
| if (tag < 0) | |
| { | |
| tag = NEG(tag); | |
| optional = true; | |
| } | |
| if(tag & 0x2000) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| } | |
| else if(tag & 0x4000) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| } | |
| else if(tag == CODEC_TAG_INDEX) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| } | |
| else | |
| { | |
| chunksize = 0; | |
| } | |
| if((int)(tag) <= ((int)CODEC_TAG_LAST_NON_SIZED) || tag & 0x6000) | |
| { | |
| int skip = 1; | |
| error = 0; | |
| if(tag == (int)findtag) | |
| { | |
| *retvalue = value; | |
| ret = true; | |
| break; | |
| } | |
| if((tag & 0xff00) == 0x2200) //sample size | |
| { | |
| chunksize = 0; // don't test against pinput->nWordsUsed, as we might be only reader enough for metadata only. | |
| skip = 0; | |
| } | |
| if((tag & 0xff00) == 0x2300) //uncompressed sample size | |
| { | |
| skip = 1; | |
| } | |
| if((tag & 0xff00) == 0x2100) //level | |
| skip = 0; | |
| if(chunksize) | |
| { | |
| if(chunksize*4 > pinput->nWordsUsed || chunksize < 0) | |
| { | |
| break; | |
| } | |
| if(skip) | |
| { | |
| //unsigned int *iptr = (unsigned int *)pinput->lpCurrentWord; | |
| pinput->lpCurrentWord += chunksize*4; | |
| pinput->nWordsUsed -= chunksize*4; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| error = 1; | |
| } | |
| } while(tag != CODEC_TAG_GROUP_TRAILER && | |
| tag != CODEC_TAG_FRAME_TRAILER && | |
| pinput->nWordsUsed>0 && !error); | |
| return ret; | |
| } | |
| /*! | |
| Copied from metadata.cpp in the cedoc common directory | |
| */ | |
| uint8_t *GetTupletAddr(uint8_t *data, | |
| int datasize, | |
| uint16_t findtag, | |
| int16_t *retvalue) | |
| { | |
| unsigned char *ret = NULL; | |
| BITSTREAM myinput, *pinput; | |
| TAGVALUE segment; | |
| TAGWORD tag,value; | |
| int error = 0; | |
| if (data == NULL || datasize == 0) { | |
| return NULL; | |
| } | |
| //InitBitstream(&myinput); | |
| memset(&myinput, 0, sizeof(BITSTREAM)); | |
| myinput.lpCurrentWord = data; | |
| myinput.nWordsUsed = datasize; | |
| myinput.nBitsFree = BITSTREAM_LONG_SIZE; | |
| pinput = &myinput; | |
| do | |
| { | |
| //BOOL optional = FALSE; | |
| bool optional = false; | |
| int chunksize = 0; | |
| // Read the next tag value pair from the bitstream | |
| segment = GetSegment(pinput); | |
| tag = segment.tuple.tag; | |
| value = segment.tuple.value; | |
| // Is this an optional tag? | |
| if (tag < 0) | |
| { | |
| tag = NEG(tag); | |
| //optional = TRUE; | |
| optional = true; | |
| } | |
| if(tag & 0x2000) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| chunksize += ((tag&0xff)<<16); | |
| } | |
| else if(tag & 0x4000) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| } | |
| else if(tag == CODEC_TAG_INDEX) | |
| { | |
| chunksize = value; | |
| chunksize &= 0xffff; | |
| } | |
| else | |
| { | |
| chunksize = 0; | |
| } | |
| if((int)(tag) <= ((int)CODEC_TAG_LAST_NON_SIZED) || tag & 0x6000) | |
| { | |
| int skip = 1; | |
| error = 0; | |
| if(tag == (int)findtag) | |
| { | |
| *retvalue = value; | |
| ret = pinput->lpCurrentWord; | |
| break; | |
| } | |
| if((tag & 0xff00) == 0x2200) //sample size | |
| { | |
| chunksize = 0; // don't test against pinput->nWordsUsed, as we might be only reader enough for metadata only. | |
| skip = 0; | |
| } | |
| if((tag & 0xff00) == 0x2300) //uncompressed sample size | |
| { | |
| skip = 1; | |
| } | |
| if((tag & 0xff00) == 0x2100) //level | |
| skip = 0; | |
| if(chunksize) | |
| { | |
| if(chunksize*4 > pinput->nWordsUsed || chunksize < 0) | |
| { | |
| break; | |
| } | |
| if(skip) | |
| { | |
| //unsigned int *iptr = (unsigned int *)pinput->lpCurrentWord; | |
| pinput->lpCurrentWord += chunksize*4; | |
| pinput->nWordsUsed -= chunksize*4; | |
| } | |
| } | |
| } | |
| else | |
| { | |
| error = 1; | |
| } | |
| } while(tag != CODEC_TAG_GROUP_TRAILER && | |
| tag != CODEC_TAG_FRAME_TRAILER && | |
| pinput->nWordsUsed>0 && !error); | |
| return ret; | |
| } |