Skip to content

Commit 0b21566

Browse files
committed
swresample: Refactor resample asm and port it to yasm (WIP)
1 parent 3df787a commit 0b21566

File tree

6 files changed

+307
-153
lines changed

6 files changed

+307
-153
lines changed

libswresample/resample.c

Lines changed: 157 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,6 @@
3030
#include "swresample_internal.h"
3131

3232

33-
typedef struct ResampleContext {
34-
const AVClass *av_class;
35-
uint8_t *filter_bank;
36-
int filter_length;
37-
int filter_alloc;
38-
int ideal_dst_incr;
39-
int dst_incr;
40-
int index;
41-
int frac;
42-
int src_incr;
43-
int compensation_distance;
44-
int phase_shift;
45-
int phase_mask;
46-
int linear;
47-
enum SwrFilterType filter_type;
48-
int kaiser_beta;
49-
double factor;
50-
enum AVSampleFormat format;
51-
int felem_size;
52-
int filter_shift;
53-
} ResampleContext;
54-
5533
/**
5634
* 0th order modified bessel function of the first kind.
5735
*/
@@ -195,6 +173,128 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
195173
return 0;
196174
}
197175

176+
static void scalarproduct_int16(const void *source, void *dest, void *filter, int len)
177+
{
178+
const int16_t *src = source;
179+
int16_t *dst = dest;
180+
int16_t *flt = filter;
181+
int32_t val = 0;
182+
int i;
183+
184+
for(i = 0; i < len; i++){
185+
val += src[i] * (int32_t)flt[i];
186+
}
187+
val = (val + (1 << (15 - 1))) >> 15;
188+
*dst = (unsigned)(val + 32768) > 65535 ? (val >> 31) ^ 32767 : val;
189+
}
190+
191+
static void scalarproduct_linear_int16(const void *source, void *filter1, void *filter2, void *val1, void *val2, int len)
192+
{
193+
const int16_t *src = source;
194+
int16_t *flt1 = filter1;
195+
int16_t *flt2 = filter2;
196+
int32_t v1 = 0, v2 = 0;
197+
int i;
198+
199+
for(i = 0; i < len; i++) {
200+
v1 += src[i] * (int32_t)flt1[i];
201+
v2 += src[i] * (int32_t)flt2[i];
202+
}
203+
*(int32_t*)val1 = v1;
204+
*(int32_t*)val2 = v2;
205+
}
206+
207+
static void scalarproduct_int32(const void *source, void *dest, void *filter, int len)
208+
{
209+
const int32_t *src = source;
210+
int32_t *dst = dest;
211+
int32_t *flt = filter;
212+
int64_t val = 0;
213+
int i;
214+
215+
for(i = 0; i < len; i++){
216+
val += src[i] * (int64_t)flt[i];
217+
}
218+
val = (val + (1 << (30 - 1))) >> 30;
219+
*dst = (uint64_t)(val + 0x80000000) > 0xFFFFFFFF ? (val >> 63) ^ 0x7FFFFFFF : val;
220+
}
221+
222+
static void scalarproduct_linear_int32(const void *source, void *filter1, void *filter2, void *val1, void *val2, int len)
223+
{
224+
const int32_t *src = source;
225+
int32_t *flt1 = filter1;
226+
int32_t *flt2 = filter2;
227+
int64_t v1 = 0, v2 = 0;
228+
int i;
229+
230+
for(i = 0; i < len; i++) {
231+
v1 += src[i] * (int64_t)flt1[i];
232+
v2 += src[i] * (int64_t)flt2[i];
233+
}
234+
*(int64_t*)val1 = v1;
235+
*(int64_t*)val2 = v2;
236+
}
237+
238+
static void scalarproduct_float(const void *source, void *dest, void *filter, int len)
239+
{
240+
const float *src = source;
241+
float *dst = dest;
242+
float *flt = filter;
243+
float val = 0;
244+
int i;
245+
246+
for(i = 0; i < len; i++){
247+
val += src[i] * flt[i];
248+
}
249+
*dst = val;
250+
}
251+
252+
static void scalarproduct_linear_float(const void *source, void *filter1, void *filter2, void *val1, void *val2, int len)
253+
{
254+
const float *src = source;
255+
float *flt1 = filter1;
256+
float *flt2 = filter2;
257+
float v1 = 0, v2 = 0;
258+
int i;
259+
260+
for(i = 0; i < len; i++) {
261+
v1 += src[i] * flt1[i];
262+
v2 += src[i] * flt2[i];
263+
}
264+
*(float*)val1 = v1;
265+
*(float*)val2 = v2;
266+
}
267+
268+
static void scalarproduct_double(const void *source, void *dest, void *filter, int len)
269+
{
270+
const double *src = source;
271+
double *dst = dest;
272+
double *flt = filter;
273+
double val = 0;
274+
int i;
275+
276+
for(i = 0; i < len; i++){
277+
val += src[i] * flt[i];
278+
}
279+
*dst = val;
280+
}
281+
282+
static void scalarproduct_linear_double(const void *source, void *filter1, void *filter2, void *val1, void *val2, int len)
283+
{
284+
const double *src = source;
285+
double *flt1 = filter1;
286+
double *flt2 = filter2;
287+
double v1 = 0, v2 = 0;
288+
int i;
289+
290+
for(i = 0; i < len; i++) {
291+
v1 += src[i] * flt1[i];
292+
v2 += src[i] * flt2[i];
293+
}
294+
*(double*)val1 = v1;
295+
*(double*)val2 = v2;
296+
}
297+
198298
static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
199299
double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta,
200300
double precision, int cheby){
@@ -216,13 +316,23 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
216316
switch(c->format){
217317
case AV_SAMPLE_FMT_S16P:
218318
c->filter_shift = 15;
319+
c->scalarproduct = scalarproduct_int16;
320+
c->scalarproduct_linear = scalarproduct_linear_int16;
219321
break;
220322
case AV_SAMPLE_FMT_S32P:
221323
c->filter_shift = 30;
324+
c->scalarproduct = scalarproduct_int32;
325+
c->scalarproduct_linear = scalarproduct_linear_int32;
222326
break;
223327
case AV_SAMPLE_FMT_FLTP:
328+
c->filter_shift = 0;
329+
c->scalarproduct = scalarproduct_float;
330+
c->scalarproduct_linear = scalarproduct_linear_float;
331+
break;
224332
case AV_SAMPLE_FMT_DBLP:
225333
c->filter_shift = 0;
334+
c->scalarproduct = scalarproduct_double;
335+
c->scalarproduct_linear = scalarproduct_linear_double;
226336
break;
227337
default:
228338
av_log(NULL, AV_LOG_ERROR, "Unsupported sample format\n");
@@ -259,6 +369,9 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
259369
c->index= -phase_count*((c->filter_length-1)/2);
260370
c->frac= 0;
261371

372+
if (ARCH_X86)
373+
swri_audio_resample_init_x86(c);
374+
262375
return c;
263376
error:
264377
av_freep(&c->filter_bank);
@@ -282,73 +395,30 @@ static int set_compensation(ResampleContext *c, int sample_delta, int compensati
282395
return 0;
283396
}
284397

285-
#define TEMPLATE_RESAMPLE_S16
286-
#include "resample_template.c"
287-
#undef TEMPLATE_RESAMPLE_S16
288-
289-
#define TEMPLATE_RESAMPLE_S32
290-
#include "resample_template.c"
291-
#undef TEMPLATE_RESAMPLE_S32
292-
293-
#define TEMPLATE_RESAMPLE_FLT
294-
#include "resample_template.c"
295-
#undef TEMPLATE_RESAMPLE_FLT
296-
297-
#define TEMPLATE_RESAMPLE_DBL
298-
#include "resample_template.c"
299-
#undef TEMPLATE_RESAMPLE_DBL
300-
301-
// XXX FIXME the whole C loop should be written in asm so this x86 specific code here isnt needed
302-
#if HAVE_MMXEXT_INLINE
303-
304-
#include "x86/resample_mmx.h"
305-
306-
#define TEMPLATE_RESAMPLE_S16_MMX2
307-
#include "resample_template.c"
308-
#undef TEMPLATE_RESAMPLE_S16_MMX2
309-
310-
#if HAVE_SSE_INLINE
311-
#define TEMPLATE_RESAMPLE_FLT_SSE
312-
#include "resample_template.c"
313-
#undef TEMPLATE_RESAMPLE_FLT_SSE
314-
#endif
315-
316-
#if HAVE_SSE2_INLINE
317-
#define TEMPLATE_RESAMPLE_S16_SSE2
318-
#include "resample_template.c"
319-
#undef TEMPLATE_RESAMPLE_S16_SSE2
320-
#endif
321-
322-
#endif // HAVE_MMXEXT_INLINE
323-
324-
static int multiple_resample(ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed){
325-
int i, ret= -1;
326-
int av_unused mm_flags = av_get_cpu_flags();
327-
int need_emms= 0;
328-
329-
for(i=0; i<dst->ch_count; i++){
330-
#if HAVE_MMXEXT_INLINE
331-
#if HAVE_SSE2_INLINE
332-
if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_SSE2)) ret= swri_resample_int16_sse2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
333-
else
334-
#endif
335-
if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_MMX2 )){
336-
ret= swri_resample_int16_mmx2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
337-
need_emms= 1;
338-
} else
339-
#endif
340-
if(c->format == AV_SAMPLE_FMT_S16P) ret= swri_resample_int16(c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
341-
else if(c->format == AV_SAMPLE_FMT_S32P) ret= swri_resample_int32(c, (int32_t*)dst->ch[i], (const int32_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
342-
#if HAVE_SSE_INLINE
343-
else if(c->format == AV_SAMPLE_FMT_FLTP && (mm_flags&AV_CPU_FLAG_SSE))
344-
ret= swri_resample_float_sse (c, (float*)dst->ch[i], (const float*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
345-
#endif
346-
else if(c->format == AV_SAMPLE_FMT_FLTP) ret= swri_resample_float(c, (float *)dst->ch[i], (const float *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
347-
else if(c->format == AV_SAMPLE_FMT_DBLP) ret= swri_resample_double(c,(double *)dst->ch[i], (const double *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
398+
static int multiple_resample(ResampleContext *c, AudioData *dest, int dst_size, AudioData *source, int src_size, int *consumed){
399+
int j, dst_index = -1;
400+
401+
if(c->format == AV_SAMPLE_FMT_S16P) {
402+
# define TEMPLATE_RESAMPLE_S16
403+
# include "resample_template.c"
404+
# undef TEMPLATE_RESAMPLE_S16
405+
} else if(c->format == AV_SAMPLE_FMT_S32P) {
406+
# define TEMPLATE_RESAMPLE_S32
407+
# include "resample_template.c"
408+
# undef TEMPLATE_RESAMPLE_S32
409+
} else if(c->format == AV_SAMPLE_FMT_FLTP) {
410+
# define TEMPLATE_RESAMPLE_FLT
411+
# include "resample_template.c"
412+
# undef TEMPLATE_RESAMPLE_FLT
413+
} else if(c->format == AV_SAMPLE_FMT_DBLP) {
414+
# define TEMPLATE_RESAMPLE_DBL
415+
# include "resample_template.c"
416+
# undef TEMPLATE_RESAMPLE_DBL
348417
}
349-
if(need_emms)
418+
if(c->need_emms)
350419
emms_c();
351-
return ret;
420+
421+
return dst_index;
352422
}
353423

354424
static int64_t get_delay(struct SwrContext *s, int64_t base){

0 commit comments

Comments
 (0)